publisci 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
@@ -0,0 +1,240 @@
1
+ module PubliSci
2
+ module DataSet
3
+ module ORM
4
+ class DataCube
5
+ extend PubliSci::Dataset::DataCube
6
+ extend PubliSci::Analyzer
7
+ extend PubliSci::Query
8
+ extend PubliSci::Parser
9
+
10
+ include PubliSci::Dataset::DataCube
11
+ include PubliSci::Analyzer
12
+ include PubliSci::Metadata::Generator
13
+ include PubliSci::Query
14
+ include PubliSci::Parser
15
+
16
+ attr_accessor :labels
17
+ attr_accessor :dimensions
18
+ attr_accessor :measures
19
+ attr_accessor :obs
20
+ attr_accessor :meta
21
+
22
+ def initialize(options={},do_parse = true)
23
+ @dimensions = {}
24
+ @measures = []
25
+ @obs = []
26
+ @generator_options = {}
27
+ @options = {}
28
+
29
+ @meta = {}
30
+
31
+ parse_options options if do_parse
32
+ end
33
+
34
+ def self.load(graph,options={},verbose=false)
35
+
36
+
37
+ graph = load_string(graph) unless graph =~ /^http/
38
+
39
+ # puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
40
+ dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
41
+ #TODO coded properties should be found via SPARQL queries
42
+ if solution[:range].split('/')[-2] == "code"
43
+ type = :coded
44
+ else
45
+ type = solution[:range].to_s
46
+ end
47
+ [solution[:dimension], {type: type}]
48
+ }]
49
+ puts "dimensions: #{dimensions}" if verbose
50
+
51
+ codes = execute_from_file('code_resources.rq',graph).to_h.map{|sol|
52
+ [sol[:dimension].to_s, sol[:codeList].to_s, sol[:class].to_s]
53
+ }
54
+ puts "codes: #{codes}" if verbose
55
+
56
+ measures = execute_from_file('measures.rq',graph).to_h.map{|m| m[:measure].to_s}
57
+ puts "measures: #{measures}" if verbose
58
+
59
+ name = execute_from_file('dataset.rq',graph).to_h.first[:label]
60
+ puts "dataset: #{name}" if verbose
61
+
62
+ obs = execute_from_file('observations.rq',graph)
63
+ observations = observation_hash(obs)
64
+ puts "observations: #{observations}" if verbose
65
+
66
+ # simple_observations = observation_hash(obs,true)
67
+
68
+ labels = execute_from_file('observation_labels.rq', graph)
69
+ labels = Hash[labels.map{|sol|
70
+ [sol[:observation].to_s, sol[:label].to_s]
71
+ }]
72
+
73
+ new_opts = {
74
+ measures: measures,
75
+ dimensions: dimensions,
76
+ observations: observations.values,
77
+ name: name,
78
+ labels: labels.values,
79
+ codes: codes
80
+ }
81
+
82
+ options = options.merge(new_opts)
83
+ puts "creating #{options}" if verbose
84
+ self.new(options)
85
+ end
86
+
87
+ def parse_options(options)
88
+ if options[:dimensions]
89
+ options[:dimensions].each{|name,details|
90
+ add_dimension(name, details[:type] || :coded)
91
+ }
92
+ end
93
+
94
+ if options[:measures]
95
+ options[:measures].each{|m| @measures << m}
96
+ end
97
+
98
+ if options[:observations]
99
+ options[:observations].each{|obs_data| add_observation obs_data}
100
+ end
101
+
102
+ @generator_options = options[:generator_options] if options[:generator_options]
103
+ @options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
104
+
105
+ if options[:name]
106
+ @name = options[:name]
107
+ else
108
+ raise "No dataset name specified!"
109
+ end
110
+
111
+ if options[:validate_each]
112
+ @options[:validate_each] = options[:validate_each]
113
+ end
114
+
115
+ if options[:labels]
116
+ @labels = options[:labels]
117
+ end
118
+
119
+ if options[:codes]
120
+ @codes = options[:codes]
121
+ end
122
+ end
123
+
124
+ def to_n3
125
+
126
+ #create labels if not specified
127
+ unless @labels.is_a?(Array) && @labels.size == @obs.size
128
+ if @labels.is_a? Symbol
129
+ #define some automatic labeling methods
130
+ else
131
+ @labels = (1..@obs.size).to_a.map(&:to_s)
132
+ end
133
+ end
134
+ data = {}
135
+
136
+
137
+ #collect observation data
138
+ check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
139
+ @obs.map{|obs|
140
+ (@measures | @dimensions.keys).map{ |component|
141
+ (data[component] ||= []) << obs.data[component]
142
+ }
143
+ }
144
+
145
+
146
+ @codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact unless @codes
147
+ str = generate(@measures, @dimensions.keys, @codes, data, @labels, @name, @generator_options)
148
+ unless @options[:skip_metadata]
149
+ fields = {
150
+ publishers: publishers(),
151
+ subject: subjects(),
152
+ author: author(),
153
+ description: description(),
154
+ date: date(),
155
+ var: @name,
156
+ }
157
+ # puts basic(fields,@generator_options)
158
+ str += "\n" + basic(fields)
159
+ end
160
+ str
161
+ end
162
+
163
+ def add_dimension(name, type=:coded)
164
+ @dimensions[name.to_s] = {type: type}
165
+ end
166
+
167
+ def add_measure(name)
168
+ @measures << name
169
+ end
170
+
171
+ def add_observation(data)
172
+ data = Hash[data.map{|k,v| [k.to_s, v]}]
173
+ obs = Observation.new(data)
174
+ check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
175
+ @obs << obs
176
+ end
177
+
178
+ def insert(observation)
179
+ @obs << observation
180
+ end
181
+
182
+ def publishers
183
+ @meta[:publishers] ||= []
184
+ end
185
+
186
+ def publishers=(publishers)
187
+ @meta[:publishers] = publishers
188
+ end
189
+
190
+ def subjects
191
+ @meta[:subject] ||= []
192
+ end
193
+
194
+ def subjects=(subjects)
195
+ @meta[:subject]=subjects
196
+ end
197
+
198
+ def add_publisher(label,uri)
199
+ publishers << {label: label, uri: uri}
200
+ end
201
+
202
+ def add_subject(id)
203
+ subject << id
204
+ end
205
+
206
+ def author
207
+ @meta[:creator] ||= ""
208
+ end
209
+
210
+ def author=(author)
211
+ @meta[:creator] = author
212
+ end
213
+
214
+ def description
215
+ @meta[:description] ||= ""
216
+ end
217
+
218
+ def description=(description)
219
+ @meta[:description] = description
220
+ end
221
+
222
+ def date
223
+ @meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
224
+ end
225
+
226
+ def date=(date)
227
+ @meta[:date] = date
228
+ end
229
+
230
+ def to_h
231
+ {
232
+ measures: @measures,
233
+ dimensions: @dimensions,
234
+ observations: @obs.map{|o| o.data}
235
+ }
236
+ end
237
+ end
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,20 @@
1
+ module PubliSci
2
+ module DataSet
3
+ module ORM
4
+ class Observation
5
+ attr_accessor :data
6
+ def initialize(data={})
7
+ @data = data
8
+ end
9
+
10
+ def method_missing(name, args)
11
+ #get entry of data hash
12
+ end
13
+
14
+ def respond_to_missing?(method, *)
15
+
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ module PubliSci
2
+ class Dataset
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ interactive: false,
7
+ }
8
+ end
9
+
10
+ defaults.keys.each{|k|
11
+ default = defaults[k]
12
+ define_method(k) do |input=nil|
13
+ var = instance_variable_get :"@#{k}"
14
+ if var
15
+ var
16
+ else
17
+ instance_variable_set :"@#{k}", default
18
+ end
19
+
20
+ if input
21
+ instance_variable_set :"@#{k}", input
22
+ end
23
+
24
+ instance_variable_get :"@#{k}"
25
+ end
26
+
27
+ attr_writer k
28
+ }
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,418 @@
1
+ #monkey patch to make rdf string w/ heredocs prettier ;)
2
+ class String
3
+ def unindent
4
+ gsub /^#{self[/\A\s*/]}/, ''
5
+ end
6
+ end
7
+
8
+ module PubliSci
9
+ class Dataset
10
+ module DataCube
11
+ include PubliSci::Parser
12
+ def defaults
13
+ {
14
+ type: :dataframe,
15
+ encode_nulls: false,
16
+ base_url: "http://onto.strinz.me",
17
+ }
18
+ end
19
+
20
+ def generate_resources(measures, dimensions, codes, options={})
21
+ newm = measures.map {|m|
22
+ if m =~ /^http:\/\//
23
+ "<#{m}>"
24
+ elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/
25
+ m
26
+ else
27
+ "prop:#{m}"
28
+ end
29
+ }
30
+
31
+ newc = []
32
+
33
+ newd = dimensions.map{|d|
34
+ if d =~ /^http:\/\//
35
+ "<#{d}>"
36
+ elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/
37
+ d
38
+ else
39
+ "prop:#{d}"
40
+ end
41
+ }
42
+
43
+ if codes.first.is_a? Array
44
+ newc = codes.map{|c|
45
+ c.map{|el|
46
+ if el =~ /^http:\/\//
47
+ "<#{el}>"
48
+ else
49
+ el
50
+ end
51
+ }
52
+ }
53
+ else
54
+ newc = codes.map{|c|
55
+ ["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"]
56
+ }
57
+ end
58
+ [newm, newd, newc]
59
+ end
60
+
61
+ def component_gen(args,var,options={})
62
+ args = Array[args].flatten
63
+ args = args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
64
+ args.map{|arg| arg.gsub(%r{<http://(.+)>},"<#{options[:base_url]}/dc/dataset/#{var}/cs/"+'\1'+'>')}
65
+ end
66
+
67
+ def encode_data(codes,data,var,options={})
68
+ codes = sanitize(codes)
69
+ new_data = {}
70
+ data.map{|k,v|
71
+ if codes.include? k
72
+ new_data[k] = v.map{|val|
73
+ if val =~ /^http:\/\//
74
+ "<#{val}>"
75
+ elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/
76
+ val
77
+ else
78
+ "<code/#{k.downcase}/#{sanitize(val).first}>"
79
+ end
80
+ }
81
+ else
82
+ new_data[k] = v
83
+ end
84
+ }
85
+ new_data
86
+ end
87
+
88
+ def vocabulary(vocab,options={})
89
+ if vocab.is_a?(String) && vocab =~ /^http:\/\//
90
+ RDF::Vocabulary.new(vocab)
91
+ elsif RDF.const_defined? vocab.to_sym && RDF.const_get(vocab.to_sym).inspect =~ /^RDF::Vocabulary/
92
+ RDF.const_get(vocab)
93
+ else
94
+ nil
95
+ end
96
+ end
97
+
98
+ def generate(measures, dimensions, codes, data, observation_labels, var, options={})
99
+ # dimensions = sanitize(dimensions)
100
+ # codes = sanitize(codes)
101
+ # measures = sanitize(measures)
102
+ var = sanitize([var]).first
103
+ data = sanitize_hash(data)
104
+
105
+ str = prefixes(var,options)
106
+ str << data_structure_definition(measures, dimensions, codes, var, options)
107
+ str << dataset(var, options)
108
+ component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c }
109
+ dimension_properties(dimensions, codes, var, options).map{|p| str << p}
110
+ measure_properties(measures, var, options).map{|p| str << p}
111
+ code_lists(codes, data, var, options).map{|l| str << l}
112
+ concept_codes(codes, data, var, options).map{|c| str << c}
113
+ observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
114
+ str
115
+ end
116
+
117
+ def prefixes(var, options={})
118
+ var = sanitize([var]).first
119
+ options = defaults().merge(options)
120
+ base = options[:base_url]
121
+ <<-EOF.unindent
122
+ @base <#{base}/dc/dataset/#{var}/> .
123
+ @prefix ns: <#{base}/dc/dataset/#{var}/> .
124
+ @prefix qb: <http://purl.org/linked-data/cube#> .
125
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
126
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
127
+ @prefix prop: <#{base}/properties/> .
128
+ @prefix dct: <http://purl.org/dc/terms/> .
129
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
130
+ @prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
131
+ @prefix code: <#{base}/dc/dataset/#{var}/code/> .
132
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
133
+ @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
134
+ @prefix foaf: <http://xmlns.com/foaf/0.1/> .
135
+ @prefix org: <http://www.w3.org/ns/org#> .
136
+ @prefix prov: <http://www.w3.org/ns/prov#> .
137
+
138
+ EOF
139
+ end
140
+
141
+ def data_structure_definition(measures,dimensions,codes,var,options={})
142
+ var = sanitize([var]).first
143
+ options = defaults().merge(options)
144
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
145
+ cs_dims = component_gen(rdf_dimensions,var,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
146
+ cs_meas = component_gen(rdf_measures,var,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
147
+ str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
148
+ cs_dims.map{|d|
149
+ str << " qb:component #{d} ;\n"
150
+ }
151
+
152
+ cs_meas.map{|m|
153
+ str << " qb:component #{m} ;\n"
154
+ }
155
+ str[-2]='.'
156
+ str<<"\n"
157
+ str
158
+ end
159
+
160
+ def dataset(var,options={})
161
+ var = sanitize([var]).first
162
+ options = defaults().merge(options)
163
+ <<-EOF.unindent
164
+ ns:dataset-#{var} a qb:DataSet ;
165
+ rdfs:label "#{var}"@en ;
166
+ qb:structure ns:dsd-#{var} .
167
+
168
+ EOF
169
+ end
170
+
171
+ def component_specifications(measure_names, dimension_names, codes, var, options={})
172
+ options = defaults().merge(options)
173
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
174
+ cs_dims = component_gen(rdf_dimensions,var,options)
175
+ cs_meas = component_gen(rdf_measures,var,options)
176
+ specs = []
177
+
178
+ rdf_dimensions.each_with_index.map{|d,i|
179
+ specs << <<-EOF.unindent
180
+ #{cs_dims[i]} a qb:ComponentSpecification ;
181
+ rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))}" ;
182
+ qb:dimension #{d} .
183
+
184
+ EOF
185
+ }
186
+
187
+ rdf_measures.each_with_index.map{|n,i|
188
+ specs << <<-EOF.unindent
189
+ #{cs_meas[i]} a qb:ComponentSpecification ;
190
+ rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))}" ;
191
+ qb:measure #{n} .
192
+
193
+ EOF
194
+ }
195
+
196
+ specs
197
+ end
198
+
199
+ def dimension_properties(dimensions, codes, var, options={})
200
+ options = defaults().merge(options)
201
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options)
202
+ props = []
203
+
204
+ dimension_codes = rdf_codes.map{|c|
205
+ if c[0]=~/^<http:/
206
+ c[0][1..-2]
207
+ else
208
+ c[0]
209
+ end
210
+ }
211
+
212
+ rdf_dimensions.each_with_index{|d,i|
213
+ if dimension_codes.include?(dimensions[i])
214
+
215
+ code = rdf_codes[dimension_codes.index(dimensions[i])]
216
+ props << <<-EOF.unindent
217
+ #{d} a rdf:Property, qb:DimensionProperty ;
218
+ rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
219
+ qb:codeList #{code[1]} ;
220
+ rdfs:range #{code[2]} .
221
+
222
+ EOF
223
+ else
224
+ props << <<-EOF.unindent
225
+ #{d} a rdf:Property, qb:DimensionProperty ;
226
+ rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
227
+ EOF
228
+ if options[:ranges] && options[:ranges][dimension[i]]
229
+ props.last << "\n rdfs:range #{options[:ranges][dimensions[i]]} .\n\n"
230
+ else
231
+ props.last[-2] = ".\n"
232
+ end
233
+ end
234
+ }
235
+
236
+ props
237
+ end
238
+
239
+ def measure_properties(measures, var, options={})
240
+ options = defaults().merge(options)
241
+ rdf_measures = generate_resources(measures, [], [], options)[0]
242
+ props = []
243
+
244
+ rdf_measures.each_with_index{ |m,i|
245
+
246
+ props << <<-EOF.unindent
247
+ #{m} a rdf:Property, qb:MeasureProperty ;
248
+ rdfs:label "#{strip_prefixes(strip_uri(m))}"@en ;
249
+ EOF
250
+
251
+ if options[:ranges] && options[:ranges][measures[i]]
252
+ props.last << " rdfs:range #{options[:ranges][measures[i]]} .\n\n"
253
+ else
254
+ props.last[-2] = ".\n"
255
+ end
256
+ }
257
+
258
+ props
259
+ end
260
+
261
+ def observations(measures, dimensions, codes, data, observation_labels, var, options={})
262
+ var = sanitize([var]).first
263
+ measures = sanitize(measures)
264
+ dimensions = sanitize(dimensions)
265
+
266
+ data.each{|k,v| data[k]=Array(v)}
267
+ observation_labels = Array(observation_labels)
268
+ options = defaults().merge(options)
269
+
270
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
271
+ data = encode_data(codes, data, var, options)
272
+ obs = []
273
+
274
+ dimension_codes = rdf_codes.map{|c|
275
+ if c[0]=~/^<http:/
276
+ c[0][1..-2]
277
+ else
278
+ c[0]
279
+ end
280
+ }
281
+
282
+ observation_labels.each_with_index.map{|r, i|
283
+ # contains_nulls = false
284
+ str = <<-EOF.unindent
285
+ ns:obs#{r} a qb:Observation ;
286
+ qb:dataSet ns:dataset-#{var} ;
287
+ EOF
288
+
289
+ str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
290
+
291
+ obs_index = 0
292
+ obs_nodes = []
293
+
294
+ dimensions.each_with_index{|d,j|
295
+ contains_nulls = (data[d][i] == nil)
296
+
297
+ unless contains_nulls && !options[:encode_nulls]
298
+ if is_complex?(data[d][i])
299
+ str << " #{rdf_dimensions[j]} #{add_node(obs_index,add_node(r))} ;\n"
300
+ obs_nodes << encode_value(data[d][i], options, obs_index, add_node(r))
301
+ else
302
+ str << " #{rdf_dimensions[j]} #{encode_value(data[d][i], options)} ;\n"
303
+ end
304
+ end
305
+
306
+ obs_index += 1
307
+ }
308
+
309
+ measures.each_with_index{|m,j|
310
+ contains_nulls = (data[m][i] == nil)
311
+
312
+ unless contains_nulls && !options[:encode_nulls]
313
+ if is_complex?(data[m][i])
314
+ str << " #{rdf_measures[j]} #{add_node(obs_index,add_node(r))} ;\n"
315
+ val = encode_value(data[m][i], options, obs_index, add_node(r))
316
+
317
+ if val.last.is_a? Array
318
+ unless val.last.last[-2] == "."
319
+ val.last.last << ".\n"
320
+ end
321
+ end
322
+
323
+ obs_nodes << val
324
+ else
325
+ str << " #{rdf_measures[j]} #{encode_value(data[m][i], options)} ;\n"
326
+ end
327
+ end
328
+
329
+ obs_index += 1
330
+ }
331
+
332
+ str << " .\n\n"
333
+
334
+ if obs_nodes.size > 0
335
+ flatted = obs_nodes.flatten
336
+ str << turtle_indent(flatted.join("\n"))
337
+ str << " \n\n"
338
+ end
339
+
340
+ obs << str
341
+
342
+ }
343
+ obs
344
+ end
345
+
346
+ def code_lists(codes, data, var, options={})
347
+ options = defaults().merge(options)
348
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
349
+ data = encode_data(codes, data, var, options)
350
+ lists = []
351
+ rdf_codes.map{|code|
352
+ if code[0] =~ /^<.+>$/
353
+ refcode = code[0][1..-2]
354
+ else
355
+ refcode = code[0]
356
+ end
357
+ str = <<-EOF.unindent
358
+ #{code[2]} a rdfs:Class, owl:Class;
359
+ rdfs:subClassOf skos:Concept ;
360
+ rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist class"@en;
361
+ rdfs:comment "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
362
+ rdfs:seeAlso #{code[1]} .
363
+
364
+ #{code[1]} a skos:ConceptScheme;
365
+ skos:prefLabel "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
366
+ rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
367
+ skos:notation "CL_#{strip_prefixes(strip_uri(code[1])).upcase}";
368
+ skos:note "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
369
+ EOF
370
+ data[refcode].uniq.map{|value|
371
+ unless value == nil && !options[:encode_nulls]
372
+ str << " skos:hasTopConcept #{to_resource(value,options)} ;\n"
373
+ end
374
+ }
375
+
376
+ str << " .\n\n"
377
+ lists << str
378
+ }
379
+
380
+ lists
381
+ end
382
+
383
+ def concept_codes(codes, data, var, options={})
384
+ options = defaults().merge(options)
385
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
386
+ concepts = []
387
+ data = encode_data(codes, data, var, options)
388
+ rdf_codes.map{|code|
389
+ if code[0] =~ /^<.+>$/
390
+ refcode = code[0][1..-2]
391
+ else
392
+ refcode = code[0]
393
+ end
394
+ data[refcode].uniq.each_with_index{|value,i|
395
+ unless value == nil && !options[:encode_nulls]
396
+ concepts << <<-EOF.unindent
397
+ #{to_resource(value,options)} a skos:Concept, #{code[2]};
398
+ skos:topConceptOf #{code[1]} ;
399
+ skos:prefLabel "#{strip_uri(value)}" ;
400
+ skos:inScheme #{code[1]} .
401
+
402
+ EOF
403
+ end
404
+ }
405
+ }
406
+
407
+ concepts
408
+ end
409
+
410
+
411
+ def abbreviate_known(turtle_string)
412
+ #debug method
413
+ # puts turtle_string
414
+ turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\w+)>/, 'code:\2').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, '<code/' + '\2' +'>')
415
+ end
416
+ end
417
+ end
418
+ end