publisci 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
@@ -0,0 +1,240 @@
1
+ module PubliSci
2
+ module DataSet
3
+ module ORM
4
+ class DataCube
5
+ extend PubliSci::Dataset::DataCube
6
+ extend PubliSci::Analyzer
7
+ extend PubliSci::Query
8
+ extend PubliSci::Parser
9
+
10
+ include PubliSci::Dataset::DataCube
11
+ include PubliSci::Analyzer
12
+ include PubliSci::Metadata::Generator
13
+ include PubliSci::Query
14
+ include PubliSci::Parser
15
+
16
+ attr_accessor :labels
17
+ attr_accessor :dimensions
18
+ attr_accessor :measures
19
+ attr_accessor :obs
20
+ attr_accessor :meta
21
+
22
+ def initialize(options={},do_parse = true)
23
+ @dimensions = {}
24
+ @measures = []
25
+ @obs = []
26
+ @generator_options = {}
27
+ @options = {}
28
+
29
+ @meta = {}
30
+
31
+ parse_options options if do_parse
32
+ end
33
+
34
+ def self.load(graph,options={},verbose=false)
35
+
36
+
37
+ graph = load_string(graph) unless graph =~ /^http/
38
+
39
+ # puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
40
+ dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
41
+ #TODO coded properties should be found via SPARQL queries
42
+ if solution[:range].split('/')[-2] == "code"
43
+ type = :coded
44
+ else
45
+ type = solution[:range].to_s
46
+ end
47
+ [solution[:dimension], {type: type}]
48
+ }]
49
+ puts "dimensions: #{dimensions}" if verbose
50
+
51
+ codes = execute_from_file('code_resources.rq',graph).to_h.map{|sol|
52
+ [sol[:dimension].to_s, sol[:codeList].to_s, sol[:class].to_s]
53
+ }
54
+ puts "codes: #{codes}" if verbose
55
+
56
+ measures = execute_from_file('measures.rq',graph).to_h.map{|m| m[:measure].to_s}
57
+ puts "measures: #{measures}" if verbose
58
+
59
+ name = execute_from_file('dataset.rq',graph).to_h.first[:label]
60
+ puts "dataset: #{name}" if verbose
61
+
62
+ obs = execute_from_file('observations.rq',graph)
63
+ observations = observation_hash(obs)
64
+ puts "observations: #{observations}" if verbose
65
+
66
+ # simple_observations = observation_hash(obs,true)
67
+
68
+ labels = execute_from_file('observation_labels.rq', graph)
69
+ labels = Hash[labels.map{|sol|
70
+ [sol[:observation].to_s, sol[:label].to_s]
71
+ }]
72
+
73
+ new_opts = {
74
+ measures: measures,
75
+ dimensions: dimensions,
76
+ observations: observations.values,
77
+ name: name,
78
+ labels: labels.values,
79
+ codes: codes
80
+ }
81
+
82
+ options = options.merge(new_opts)
83
+ puts "creating #{options}" if verbose
84
+ self.new(options)
85
+ end
86
+
87
+ def parse_options(options)
88
+ if options[:dimensions]
89
+ options[:dimensions].each{|name,details|
90
+ add_dimension(name, details[:type] || :coded)
91
+ }
92
+ end
93
+
94
+ if options[:measures]
95
+ options[:measures].each{|m| @measures << m}
96
+ end
97
+
98
+ if options[:observations]
99
+ options[:observations].each{|obs_data| add_observation obs_data}
100
+ end
101
+
102
+ @generator_options = options[:generator_options] if options[:generator_options]
103
+ @options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
104
+
105
+ if options[:name]
106
+ @name = options[:name]
107
+ else
108
+ raise "No dataset name specified!"
109
+ end
110
+
111
+ if options[:validate_each]
112
+ @options[:validate_each] = options[:validate_each]
113
+ end
114
+
115
+ if options[:labels]
116
+ @labels = options[:labels]
117
+ end
118
+
119
+ if options[:codes]
120
+ @codes = options[:codes]
121
+ end
122
+ end
123
+
124
+ def to_n3
125
+
126
+ #create labels if not specified
127
+ unless @labels.is_a?(Array) && @labels.size == @obs.size
128
+ if @labels.is_a? Symbol
129
+ #define some automatic labeling methods
130
+ else
131
+ @labels = (1..@obs.size).to_a.map(&:to_s)
132
+ end
133
+ end
134
+ data = {}
135
+
136
+
137
+ #collect observation data
138
+ check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
139
+ @obs.map{|obs|
140
+ (@measures | @dimensions.keys).map{ |component|
141
+ (data[component] ||= []) << obs.data[component]
142
+ }
143
+ }
144
+
145
+
146
+ @codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact unless @codes
147
+ str = generate(@measures, @dimensions.keys, @codes, data, @labels, @name, @generator_options)
148
+ unless @options[:skip_metadata]
149
+ fields = {
150
+ publishers: publishers(),
151
+ subject: subjects(),
152
+ author: author(),
153
+ description: description(),
154
+ date: date(),
155
+ var: @name,
156
+ }
157
+ # puts basic(fields,@generator_options)
158
+ str += "\n" + basic(fields)
159
+ end
160
+ str
161
+ end
162
+
163
+ def add_dimension(name, type=:coded)
164
+ @dimensions[name.to_s] = {type: type}
165
+ end
166
+
167
+ def add_measure(name)
168
+ @measures << name
169
+ end
170
+
171
+ def add_observation(data)
172
+ data = Hash[data.map{|k,v| [k.to_s, v]}]
173
+ obs = Observation.new(data)
174
+ check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
175
+ @obs << obs
176
+ end
177
+
178
+ def insert(observation)
179
+ @obs << observation
180
+ end
181
+
182
+ def publishers
183
+ @meta[:publishers] ||= []
184
+ end
185
+
186
+ def publishers=(publishers)
187
+ @meta[:publishers] = publishers
188
+ end
189
+
190
+ def subjects
191
+ @meta[:subject] ||= []
192
+ end
193
+
194
+ def subjects=(subjects)
195
+ @meta[:subject]=subjects
196
+ end
197
+
198
+ def add_publisher(label,uri)
199
+ publishers << {label: label, uri: uri}
200
+ end
201
+
202
+ def add_subject(id)
203
+ subject << id
204
+ end
205
+
206
+ def author
207
+ @meta[:creator] ||= ""
208
+ end
209
+
210
+ def author=(author)
211
+ @meta[:creator] = author
212
+ end
213
+
214
+ def description
215
+ @meta[:description] ||= ""
216
+ end
217
+
218
+ def description=(description)
219
+ @meta[:description] = description
220
+ end
221
+
222
+ def date
223
+ @meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
224
+ end
225
+
226
+ def date=(date)
227
+ @meta[:date] = date
228
+ end
229
+
230
+ def to_h
231
+ {
232
+ measures: @measures,
233
+ dimensions: @dimensions,
234
+ observations: @obs.map{|o| o.data}
235
+ }
236
+ end
237
+ end
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,20 @@
1
+ module PubliSci
2
+ module DataSet
3
+ module ORM
4
+ class Observation
5
+ attr_accessor :data
6
+ def initialize(data={})
7
+ @data = data
8
+ end
9
+
10
+ def method_missing(name, args)
11
+ #get entry of data hash
12
+ end
13
+
14
+ def respond_to_missing?(method, *)
15
+
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ module PubliSci
2
+ class Dataset
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ interactive: false,
7
+ }
8
+ end
9
+
10
+ defaults.keys.each{|k|
11
+ default = defaults[k]
12
+ define_method(k) do |input=nil|
13
+ var = instance_variable_get :"@#{k}"
14
+ if var
15
+ var
16
+ else
17
+ instance_variable_set :"@#{k}", default
18
+ end
19
+
20
+ if input
21
+ instance_variable_set :"@#{k}", input
22
+ end
23
+
24
+ instance_variable_get :"@#{k}"
25
+ end
26
+
27
+ attr_writer k
28
+ }
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,418 @@
1
+ #monkey patch to make rdf string w/ heredocs prettier ;)
2
+ class String
3
+ def unindent
4
+ gsub /^#{self[/\A\s*/]}/, ''
5
+ end
6
+ end
7
+
8
+ module PubliSci
9
+ class Dataset
10
+ module DataCube
11
+ include PubliSci::Parser
12
+ def defaults
13
+ {
14
+ type: :dataframe,
15
+ encode_nulls: false,
16
+ base_url: "http://onto.strinz.me",
17
+ }
18
+ end
19
+
20
+ def generate_resources(measures, dimensions, codes, options={})
21
+ newm = measures.map {|m|
22
+ if m =~ /^http:\/\//
23
+ "<#{m}>"
24
+ elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/
25
+ m
26
+ else
27
+ "prop:#{m}"
28
+ end
29
+ }
30
+
31
+ newc = []
32
+
33
+ newd = dimensions.map{|d|
34
+ if d =~ /^http:\/\//
35
+ "<#{d}>"
36
+ elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/
37
+ d
38
+ else
39
+ "prop:#{d}"
40
+ end
41
+ }
42
+
43
+ if codes.first.is_a? Array
44
+ newc = codes.map{|c|
45
+ c.map{|el|
46
+ if el =~ /^http:\/\//
47
+ "<#{el}>"
48
+ else
49
+ el
50
+ end
51
+ }
52
+ }
53
+ else
54
+ newc = codes.map{|c|
55
+ ["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"]
56
+ }
57
+ end
58
+ [newm, newd, newc]
59
+ end
60
+
61
+ def component_gen(args,var,options={})
62
+ args = Array[args].flatten
63
+ args = args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
64
+ args.map{|arg| arg.gsub(%r{<http://(.+)>},"<#{options[:base_url]}/dc/dataset/#{var}/cs/"+'\1'+'>')}
65
+ end
66
+
67
+ def encode_data(codes,data,var,options={})
68
+ codes = sanitize(codes)
69
+ new_data = {}
70
+ data.map{|k,v|
71
+ if codes.include? k
72
+ new_data[k] = v.map{|val|
73
+ if val =~ /^http:\/\//
74
+ "<#{val}>"
75
+ elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/
76
+ val
77
+ else
78
+ "<code/#{k.downcase}/#{sanitize(val).first}>"
79
+ end
80
+ }
81
+ else
82
+ new_data[k] = v
83
+ end
84
+ }
85
+ new_data
86
+ end
87
+
88
+ def vocabulary(vocab,options={})
89
+ if vocab.is_a?(String) && vocab =~ /^http:\/\//
90
+ RDF::Vocabulary.new(vocab)
91
+ elsif RDF.const_defined? vocab.to_sym && RDF.const_get(vocab.to_sym).inspect =~ /^RDF::Vocabulary/
92
+ RDF.const_get(vocab)
93
+ else
94
+ nil
95
+ end
96
+ end
97
+
98
+ def generate(measures, dimensions, codes, data, observation_labels, var, options={})
99
+ # dimensions = sanitize(dimensions)
100
+ # codes = sanitize(codes)
101
+ # measures = sanitize(measures)
102
+ var = sanitize([var]).first
103
+ data = sanitize_hash(data)
104
+
105
+ str = prefixes(var,options)
106
+ str << data_structure_definition(measures, dimensions, codes, var, options)
107
+ str << dataset(var, options)
108
+ component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c }
109
+ dimension_properties(dimensions, codes, var, options).map{|p| str << p}
110
+ measure_properties(measures, var, options).map{|p| str << p}
111
+ code_lists(codes, data, var, options).map{|l| str << l}
112
+ concept_codes(codes, data, var, options).map{|c| str << c}
113
+ observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
114
+ str
115
+ end
116
+
117
+ def prefixes(var, options={})
118
+ var = sanitize([var]).first
119
+ options = defaults().merge(options)
120
+ base = options[:base_url]
121
+ <<-EOF.unindent
122
+ @base <#{base}/dc/dataset/#{var}/> .
123
+ @prefix ns: <#{base}/dc/dataset/#{var}/> .
124
+ @prefix qb: <http://purl.org/linked-data/cube#> .
125
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
126
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
127
+ @prefix prop: <#{base}/properties/> .
128
+ @prefix dct: <http://purl.org/dc/terms/> .
129
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
130
+ @prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
131
+ @prefix code: <#{base}/dc/dataset/#{var}/code/> .
132
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
133
+ @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
134
+ @prefix foaf: <http://xmlns.com/foaf/0.1/> .
135
+ @prefix org: <http://www.w3.org/ns/org#> .
136
+ @prefix prov: <http://www.w3.org/ns/prov#> .
137
+
138
+ EOF
139
+ end
140
+
141
+ def data_structure_definition(measures,dimensions,codes,var,options={})
142
+ var = sanitize([var]).first
143
+ options = defaults().merge(options)
144
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
145
+ cs_dims = component_gen(rdf_dimensions,var,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
146
+ cs_meas = component_gen(rdf_measures,var,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
147
+ str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
148
+ cs_dims.map{|d|
149
+ str << " qb:component #{d} ;\n"
150
+ }
151
+
152
+ cs_meas.map{|m|
153
+ str << " qb:component #{m} ;\n"
154
+ }
155
+ str[-2]='.'
156
+ str<<"\n"
157
+ str
158
+ end
159
+
160
+ def dataset(var,options={})
161
+ var = sanitize([var]).first
162
+ options = defaults().merge(options)
163
+ <<-EOF.unindent
164
+ ns:dataset-#{var} a qb:DataSet ;
165
+ rdfs:label "#{var}"@en ;
166
+ qb:structure ns:dsd-#{var} .
167
+
168
+ EOF
169
+ end
170
+
171
+ def component_specifications(measure_names, dimension_names, codes, var, options={})
172
+ options = defaults().merge(options)
173
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
174
+ cs_dims = component_gen(rdf_dimensions,var,options)
175
+ cs_meas = component_gen(rdf_measures,var,options)
176
+ specs = []
177
+
178
+ rdf_dimensions.each_with_index.map{|d,i|
179
+ specs << <<-EOF.unindent
180
+ #{cs_dims[i]} a qb:ComponentSpecification ;
181
+ rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))}" ;
182
+ qb:dimension #{d} .
183
+
184
+ EOF
185
+ }
186
+
187
+ rdf_measures.each_with_index.map{|n,i|
188
+ specs << <<-EOF.unindent
189
+ #{cs_meas[i]} a qb:ComponentSpecification ;
190
+ rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))}" ;
191
+ qb:measure #{n} .
192
+
193
+ EOF
194
+ }
195
+
196
+ specs
197
+ end
198
+
199
+ def dimension_properties(dimensions, codes, var, options={})
200
+ options = defaults().merge(options)
201
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options)
202
+ props = []
203
+
204
+ dimension_codes = rdf_codes.map{|c|
205
+ if c[0]=~/^<http:/
206
+ c[0][1..-2]
207
+ else
208
+ c[0]
209
+ end
210
+ }
211
+
212
+ rdf_dimensions.each_with_index{|d,i|
213
+ if dimension_codes.include?(dimensions[i])
214
+
215
+ code = rdf_codes[dimension_codes.index(dimensions[i])]
216
+ props << <<-EOF.unindent
217
+ #{d} a rdf:Property, qb:DimensionProperty ;
218
+ rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
219
+ qb:codeList #{code[1]} ;
220
+ rdfs:range #{code[2]} .
221
+
222
+ EOF
223
+ else
224
+ props << <<-EOF.unindent
225
+ #{d} a rdf:Property, qb:DimensionProperty ;
226
+ rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
227
+ EOF
228
+ if options[:ranges] && options[:ranges][dimension[i]]
229
+ props.last << "\n rdfs:range #{options[:ranges][dimensions[i]]} .\n\n"
230
+ else
231
+ props.last[-2] = ".\n"
232
+ end
233
+ end
234
+ }
235
+
236
+ props
237
+ end
238
+
239
+ def measure_properties(measures, var, options={})
240
+ options = defaults().merge(options)
241
+ rdf_measures = generate_resources(measures, [], [], options)[0]
242
+ props = []
243
+
244
+ rdf_measures.each_with_index{ |m,i|
245
+
246
+ props << <<-EOF.unindent
247
+ #{m} a rdf:Property, qb:MeasureProperty ;
248
+ rdfs:label "#{strip_prefixes(strip_uri(m))}"@en ;
249
+ EOF
250
+
251
+ if options[:ranges] && options[:ranges][measures[i]]
252
+ props.last << " rdfs:range #{options[:ranges][measures[i]]} .\n\n"
253
+ else
254
+ props.last[-2] = ".\n"
255
+ end
256
+ }
257
+
258
+ props
259
+ end
260
+
261
+ def observations(measures, dimensions, codes, data, observation_labels, var, options={})
262
+ var = sanitize([var]).first
263
+ measures = sanitize(measures)
264
+ dimensions = sanitize(dimensions)
265
+
266
+ data.each{|k,v| data[k]=Array(v)}
267
+ observation_labels = Array(observation_labels)
268
+ options = defaults().merge(options)
269
+
270
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
271
+ data = encode_data(codes, data, var, options)
272
+ obs = []
273
+
274
+ dimension_codes = rdf_codes.map{|c|
275
+ if c[0]=~/^<http:/
276
+ c[0][1..-2]
277
+ else
278
+ c[0]
279
+ end
280
+ }
281
+
282
+ observation_labels.each_with_index.map{|r, i|
283
+ # contains_nulls = false
284
+ str = <<-EOF.unindent
285
+ ns:obs#{r} a qb:Observation ;
286
+ qb:dataSet ns:dataset-#{var} ;
287
+ EOF
288
+
289
+ str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
290
+
291
+ obs_index = 0
292
+ obs_nodes = []
293
+
294
+ dimensions.each_with_index{|d,j|
295
+ contains_nulls = (data[d][i] == nil)
296
+
297
+ unless contains_nulls && !options[:encode_nulls]
298
+ if is_complex?(data[d][i])
299
+ str << " #{rdf_dimensions[j]} #{add_node(obs_index,add_node(r))} ;\n"
300
+ obs_nodes << encode_value(data[d][i], options, obs_index, add_node(r))
301
+ else
302
+ str << " #{rdf_dimensions[j]} #{encode_value(data[d][i], options)} ;\n"
303
+ end
304
+ end
305
+
306
+ obs_index += 1
307
+ }
308
+
309
+ measures.each_with_index{|m,j|
310
+ contains_nulls = (data[m][i] == nil)
311
+
312
+ unless contains_nulls && !options[:encode_nulls]
313
+ if is_complex?(data[m][i])
314
+ str << " #{rdf_measures[j]} #{add_node(obs_index,add_node(r))} ;\n"
315
+ val = encode_value(data[m][i], options, obs_index, add_node(r))
316
+
317
+ if val.last.is_a? Array
318
+ unless val.last.last[-2] == "."
319
+ val.last.last << ".\n"
320
+ end
321
+ end
322
+
323
+ obs_nodes << val
324
+ else
325
+ str << " #{rdf_measures[j]} #{encode_value(data[m][i], options)} ;\n"
326
+ end
327
+ end
328
+
329
+ obs_index += 1
330
+ }
331
+
332
+ str << " .\n\n"
333
+
334
+ if obs_nodes.size > 0
335
+ flatted = obs_nodes.flatten
336
+ str << turtle_indent(flatted.join("\n"))
337
+ str << " \n\n"
338
+ end
339
+
340
+ obs << str
341
+
342
+ }
343
+ obs
344
+ end
345
+
346
+ def code_lists(codes, data, var, options={})
347
+ options = defaults().merge(options)
348
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
349
+ data = encode_data(codes, data, var, options)
350
+ lists = []
351
+ rdf_codes.map{|code|
352
+ if code[0] =~ /^<.+>$/
353
+ refcode = code[0][1..-2]
354
+ else
355
+ refcode = code[0]
356
+ end
357
+ str = <<-EOF.unindent
358
+ #{code[2]} a rdfs:Class, owl:Class;
359
+ rdfs:subClassOf skos:Concept ;
360
+ rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist class"@en;
361
+ rdfs:comment "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
362
+ rdfs:seeAlso #{code[1]} .
363
+
364
+ #{code[1]} a skos:ConceptScheme;
365
+ skos:prefLabel "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
366
+ rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
367
+ skos:notation "CL_#{strip_prefixes(strip_uri(code[1])).upcase}";
368
+ skos:note "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
369
+ EOF
370
+ data[refcode].uniq.map{|value|
371
+ unless value == nil && !options[:encode_nulls]
372
+ str << " skos:hasTopConcept #{to_resource(value,options)} ;\n"
373
+ end
374
+ }
375
+
376
+ str << " .\n\n"
377
+ lists << str
378
+ }
379
+
380
+ lists
381
+ end
382
+
383
+ def concept_codes(codes, data, var, options={})
384
+ options = defaults().merge(options)
385
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
386
+ concepts = []
387
+ data = encode_data(codes, data, var, options)
388
+ rdf_codes.map{|code|
389
+ if code[0] =~ /^<.+>$/
390
+ refcode = code[0][1..-2]
391
+ else
392
+ refcode = code[0]
393
+ end
394
+ data[refcode].uniq.each_with_index{|value,i|
395
+ unless value == nil && !options[:encode_nulls]
396
+ concepts << <<-EOF.unindent
397
+ #{to_resource(value,options)} a skos:Concept, #{code[2]};
398
+ skos:topConceptOf #{code[1]} ;
399
+ skos:prefLabel "#{strip_uri(value)}" ;
400
+ skos:inScheme #{code[1]} .
401
+
402
+ EOF
403
+ end
404
+ }
405
+ }
406
+
407
+ concepts
408
+ end
409
+
410
+
411
+ def abbreviate_known(turtle_string)
412
+ #debug method
413
+ # puts turtle_string
414
+ turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\w+)>/, 'code:\2').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, '<code/' + '\2' +'>')
415
+ end
416
+ end
417
+ end
418
+ end