publisci 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
@@ -0,0 +1,112 @@
1
+ module PubliSci
2
+ module Readers
3
+ class RCross
4
+ include PubliSci::Dataset::DataCube
5
+ include PubliSci::Readers::Output
6
+
7
+ def generate_n3(client, var, outfile_base, options={})
8
+ meas = measures(client,var,options)
9
+ dim = dimensions(client,var,options)
10
+ codes = codes(client,var,options)
11
+
12
+ #write structure
13
+ open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
14
+
15
+ n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
16
+ chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
17
+ chromosome_list.map{|chrom|
18
+ open(outfile_base+"_#{chrom}.ttl",'w'){|f| f.write prefixes(var,options)}
19
+ entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
20
+
21
+ #get genotype data (currently only for chromosome 1)
22
+ geno_chr = client.eval("#{var}$geno$'#{chrom}'")
23
+
24
+ #get number of markers per individual
25
+
26
+ #write observations
27
+ n_individuals.times{|indi|
28
+ obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
29
+ labels = labels_for(obs_data,chrom.to_s,indi)
30
+ open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
31
+ puts "(#{chrom}) #{indi}/#{n_individuals}" unless options[:quiet]
32
+ }
33
+ }
34
+
35
+ end
36
+
37
+ def structure(client,var,options={})
38
+ meas = measures(client,var,options)
39
+ dim = dimensions(client,var,options)
40
+ codes = codes(client,var,options)
41
+
42
+ str = prefixes(var,options)
43
+ str << data_structure_definition(meas,dim,codes,var,options)
44
+ str << dataset(var,options)
45
+ component_specifications(meas, dim, codes, var, options).map{ |c| str << c }
46
+ measure_properties(meas,var,options).map{|m| str << m}
47
+
48
+ str
49
+ end
50
+
51
+ def measures(client, var, options={})
52
+ pheno_names = client.eval("names(#{var}$pheno)").to_ruby
53
+ if options[:measures]
54
+ (pheno_names & options[:measures]) | ["genotype","markerpos","marker"]
55
+ else
56
+ pheno_names | ["genotype","markerpos","marker"]
57
+ end
58
+ # measure_properties(measures,var,options)
59
+ end
60
+
61
+ def dimensions(client, var, options={})
62
+ # dimension_properties([""],var)
63
+ []
64
+ end
65
+
66
+ def codes(client, var, options={})
67
+ []
68
+ end
69
+
70
+ def labels_for(data,chr,individual,options={})
71
+ labels=(((data.first.last.size*individual)+1)..(data.first.last.size*(individual+1))).to_a.map(&:to_s)
72
+ labels.map{|l| l.insert(0,"#{chr}_")}
73
+ labels
74
+ end
75
+
76
+ def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
77
+ data = {}
78
+
79
+ data["chr"] = []
80
+ data["genotype"] = []
81
+ data["individual"] = []
82
+ data["marker"] = []
83
+ data["markerpos"] = []
84
+
85
+ pheno_names = client.eval("names(#{var}$pheno)").to_ruby
86
+ pheno_names.map{|name|
87
+ data[name] = []
88
+ }
89
+ data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
90
+
91
+ pheno_names.map{|name|
92
+ data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno$#{name}").to_ruby[row_individ])
93
+ }
94
+
95
+ num_markers = geno_chr.payload.first.to_ruby.column_size
96
+ data["chr"] << (1..num_markers).to_a.fill(chr)
97
+ data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
98
+ data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
99
+ data["markerpos"] << geno_chr.payload["map"].to_a
100
+
101
+ data.map{|k,v| v.flatten!}
102
+ data
103
+ end
104
+
105
+ def num_individuals(client, var, options={})
106
+ client.eval("#{var}$pheno").payload.first.to_ruby.size
107
+ end
108
+
109
+
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,176 @@
1
+ module PubliSci
2
+ module Readers
3
+ class RMatrix
4
+ include PubliSci::Dataset::DataCube
5
+
6
+ #NOTE; this is pretty much hard coded for Karl's application right now, and doesn't
7
+ # do any dimension or code generation. Since its a set of LOD scores indexed by dimension
8
+ # and marker the usual datacube generator wont work (I think). In the future adding an option
9
+ # to specify this kind of a dataset would probably be useful
10
+
11
+
12
+ def generate_n3(client, var, outfile_base, options={})
13
+ meas = measures(client,var,options)
14
+ dim = dimensions(client,var,options)
15
+ codes = codes(client,var,options)
16
+
17
+ outvar = sanitize([var]).first
18
+
19
+ probes_per_file = options[:probes_per_file] || 100
20
+ col_select = "colnames"
21
+ col_select = "names" if options[:type] == :dataframe
22
+
23
+ #write structure
24
+ open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,outvar,options)}
25
+
26
+ probes=client.eval("#{col_select}(#{var})").to_ruby
27
+ if probes == nil
28
+ client.eval("colnames(#{var})=1:ncol(#{var})")
29
+ probes=client.eval("#{col_select}(#{var})").to_ruby
30
+ end
31
+ markers = rows(client,var,options)
32
+
33
+ if options[:print]
34
+ puts prefixes(var,options)
35
+ end
36
+
37
+ if options[:output] == :string
38
+ str = prefixes(var,options)
39
+ end
40
+
41
+ probes.each_with_index{|probe,i|
42
+ #write prefixes and erase old file on first run
43
+ unless options[:print] || options[:output] == :string
44
+ open(outfile_base+"_#{i/probes_per_file}.ttl",'w'){|f| f.write prefixes(var,options)} if i % probes_per_file == 0
45
+ end
46
+ i+=1
47
+ obs_data = observation_data(client,var,i,markers,options)
48
+ labels = labels_for(client,var,probe)
49
+
50
+ # labels = sanitize(labels)
51
+ # return obs_data
52
+ if options[:print]
53
+ observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| puts obs}
54
+ end
55
+
56
+ if options[:output] == :string
57
+ observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| str << obs}
58
+ end
59
+
60
+ unless options[:print] || options[:output] == :string
61
+ open(outfile_base+"_#{i/probes_per_file}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,outvar,options).map{|obs| f.write obs}}
62
+ puts "#{i}/#{probes.size}" unless options[:quiet]
63
+ end
64
+ }
65
+
66
+ if options[:output] == :string
67
+ str
68
+ end
69
+ end
70
+
71
+ def structure(client,var,outvar,options={})
72
+ meas = measures(client,var,options)
73
+ dim = dimensions(client,var,options)
74
+ codes = codes(client,var,options)
75
+
76
+ str = prefixes(var, options)
77
+ str << data_structure_definition(meas,[],codes,outvar,options)
78
+ str << dataset(outvar,options)
79
+ component_specifications(meas, dim, codes, var, options).map{ |c| str << c }
80
+ measure_properties(meas,var,options).map{|m| str << m}
81
+
82
+ str
83
+ end
84
+
85
+ #for now just make everything a measure
86
+ def measures(client, var, options={})
87
+ if options[:measures]
88
+ options[:measures]
89
+ else
90
+ ["probe","marker","value"]
91
+ end
92
+ # measure_properties(measures,var,options)
93
+ end
94
+
95
+ def dimensions(client, var, options={})
96
+ # dimension_properties([""],var)
97
+ []
98
+ end
99
+
100
+ def codes(client, var, options={})
101
+ []
102
+ end
103
+
104
+ def labels_for(connection,var,probe_id,options={})
105
+ row_names = connection.eval("row.names(#{var})")
106
+ # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
107
+ if row_names == connection.eval('NULL')
108
+ row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
109
+ else
110
+ row_names = row_names.payload
111
+ end
112
+
113
+ labels = (1..(row_names.size)).to_a.map(&:to_s)
114
+ labels = labels.map{|l|
115
+ l.insert(0,probe_id.to_s + "_")
116
+ }
117
+
118
+ labels
119
+ end
120
+
121
+ def rows(connection,var,options={})
122
+ row_names = connection.eval("row.names(#{var})")
123
+ #hacky solution because rserve client's .to_ruby method doesn't fully work
124
+ if row_names == connection.eval('NULL')
125
+ row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
126
+ else
127
+ row_names = row_names.payload
128
+ end
129
+ row_names
130
+ end
131
+
132
+ def observation_data(client, var, probe_number, row_names, options={})
133
+
134
+ data = {}
135
+ # geno_chr = client.eval("#{var}$geno$'#{chr}'")
136
+ # n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
137
+ # entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
138
+ col_label = "probe"
139
+ row_label = "marker"
140
+ val_label = "value"
141
+
142
+ if options[:measures]
143
+ col_label = options[:measures][0] || "probe"
144
+ row_label = options[:measures][1] || "marker"
145
+ val_label = options[:measures][2] || "value"
146
+ end
147
+
148
+ data["#{col_label}"] = []
149
+ data["#{row_label}"] = []
150
+ data["#{val_label}"] = []
151
+
152
+ # n_individuals.times{|row_individ|
153
+ # puts "#{row_individ}/#{n_individuals}"
154
+
155
+ col_select = "colnames"
156
+ col_select = "names" if options[:type] == :dataframe
157
+
158
+ if options[:type] == :dataframe
159
+ probe_obj = client.eval("#{var}[[#{probe_number}]]").to_ruby
160
+ else
161
+ probe_obj = client.eval("#{var}[,#{probe_number}]").to_ruby
162
+ end
163
+ # puts probe_obj
164
+ probe_id = client.eval("#{col_select}(#{var})[[#{probe_number}]]").to_ruby
165
+ data["#{col_label}"] = (1..(probe_obj.size)).to_a.fill(probe_id)
166
+ probe_obj.each_with_index{|lod,i|
167
+ data["#{row_label}"] << row_names[i]
168
+ data["#{val_label}"] << lod
169
+ }
170
+
171
+ data.map{|k,v| v.flatten!}
172
+ data
173
+ end
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,56 @@
1
+ module PubliSci
2
+ # handles connection and messaging to/from the triple store
3
+ class Store
4
+ include PubliSci::Query
5
+
6
+ def defaults
7
+ {
8
+ type: :fourstore,
9
+ url: "http://localhost:8080", #TODO port etc should eventually be extracted from URI if given
10
+ replace: false
11
+ }
12
+ end
13
+
14
+ def add(file,graph)
15
+ if @options[:type] == :graph
16
+ throw "please provide an RDF::Repository" unless graph.is_a? RDF::Repository
17
+ graph.load(file)
18
+ @store = graph
19
+ @store
20
+ elsif @options[:type] == :fourstore
21
+ if @options[:replace]
22
+ `curl -T #{file} -H 'Content-Type: application/x-turtle' #{@options[:url]}/data/http%3A%2F%2Frqtl.org%2F#{graph}`
23
+ else
24
+ `curl --data-urlencode data@#{file} -d 'graph=http%3A%2F%2Frqtl.org%2F#{graph}' -d 'mime-type=application/x-turtle' #{@options[:url]}/data/`
25
+ end
26
+ end
27
+ end
28
+
29
+ def add_all(dir, graph, pattern=nil)
30
+ pattern = /.+\.ttl/ if pattern == :turtle || pattern == :ttl
31
+
32
+ files = Dir.entries(dir) - %w(. ..)
33
+ files = files.grep(pattern) if pattern.is_a? Regexp
34
+ nfiles = files.size
35
+ n = 0
36
+ files.each{|file| puts file + " #{n+=1}/#{nfiles} files"; puts add(file,graph)}
37
+ end
38
+
39
+ def initialize(options={})
40
+ @options = defaults.merge(options)
41
+ end
42
+
43
+ def query(string)
44
+ # execute(string, )
45
+ if @options[:type] == :graph
46
+ execute(string, @store, :graph)
47
+ elsif @options[:type] == :fourstore
48
+ execute(string, @options[:url], :fourstore)
49
+ end
50
+ end
51
+
52
+ def url
53
+ @options[:url]
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,91 @@
1
+ module PubliSci
2
+ module Writers
3
+ class ARFF < Base
4
+ # include PubliSci::Query
5
+ # include PubliSci::Parser
6
+ # include PubliSci::Analyzer
7
+
8
+ def build_arff(relation, attributes, data, source)
9
+ str = <<-EOS
10
+ % 1. Title: #{relation.capitalize} Database
11
+ %
12
+ % 2. Sources:
13
+ % (a) Generated from RDF source #{source}
14
+ %
15
+ @RELATION #{relation}
16
+
17
+ EOS
18
+
19
+ Hash[attributes.sort].map{|attribute,type|
20
+ str << "@ATTRIBUTE #{attribute} #{type}\n"
21
+ }
22
+
23
+ str << "\n@DATA\n"
24
+ data.map { |d| str << Hash[d[1].sort].values.join(',') + "\n" }
25
+
26
+ str
27
+ end
28
+
29
+ def from_turtle(turtle_file, verbose=false)
30
+ puts "loading #{turtle_file}" if verbose
31
+ repo = RDF::Repository.load(turtle_file)
32
+ puts "loaded #{repo.size} statements into temporary repo" if verbose
33
+
34
+ dims = dimensions(repo)
35
+ meas = measures(repo)
36
+ data = observations(repo)
37
+
38
+ relation = dataSet(repo)
39
+ codes = codes(repo)
40
+
41
+ attributes = {}
42
+
43
+ (dims | meas).map{|component|
44
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
45
+ when "xsd:int"
46
+ "integer"
47
+ when "xsd:double"
48
+ "real"
49
+ when :coded
50
+ if dims.include? component
51
+ "{#{codes[component].join(', ')}}"
52
+ else
53
+ "string"
54
+ end
55
+ end
56
+ }
57
+
58
+ build_arff(relation, attributes, data, turtle_file)
59
+ end
60
+
61
+ def from_store(repo, dataset=nil, title=nil, verbose=false)
62
+ # data = observation_hash(execute_from_file("observations.rq",repo,:graph,{"%{dataSet}"=>"<#{dataSet}>"}), true)
63
+
64
+ dims = dimensions(repo,dataset)
65
+ meas = measures(repo,dataset)
66
+ data = observations(repo,dataset)
67
+ codes = codes(repo,dataset)
68
+ attributes = {}
69
+
70
+ (dims | meas).map{|component|
71
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
72
+ when "xsd:int"
73
+ "integer"
74
+ when "xsd:double"
75
+ "real"
76
+ when :coded
77
+ if dims.include? component
78
+ "{#{codes[component].join(', ')}}"
79
+ else
80
+ "string"
81
+ end
82
+ end
83
+ }
84
+
85
+ dataset = dataSet(repo) unless dataset
86
+ title = dataset unless title
87
+ build_arff(title,attributes,data,dataset)
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,93 @@
1
+ module PubliSci
2
+ module Writers
3
+ class Base
4
+ include PubliSci::Query
5
+ include PubliSci::Parser
6
+ include PubliSci::Analyzer
7
+
8
+ def handle_input(input)
9
+ if input.is_a? String
10
+ if File.exist? input
11
+ RDF::Repository.load(input)
12
+ else
13
+ raise "UnkownStringInput: #{input}"
14
+ end
15
+ elsif input.is_a? RDF::Repository
16
+ input
17
+ else
18
+ raise "UnkownInput: #{input}, #{input.class}"
19
+ end
20
+ end
21
+
22
+ def dimensions(input, data_set=nil, select=:label)
23
+ repo = handle_input(input)
24
+
25
+ if data_set
26
+ dims = execute_from_file("dimensions.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
27
+ else
28
+ dims = execute_from_file("dimensions.rq",repo,:graph)
29
+ end
30
+
31
+ dims.to_h.map{|d| d[select].to_s}
32
+ end
33
+
34
+ def measures(input, data_set=nil, select=:label)
35
+ repo = handle_input(input)
36
+
37
+ if data_set
38
+ meas = execute_from_file("measures.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
39
+ else
40
+ meas = execute_from_file("measures.rq",repo,:graph)
41
+ end
42
+
43
+ meas.to_h.map{|d| d[select].to_s}
44
+ end
45
+
46
+ def observations(input, data_set = nil, shorten_url = true)
47
+ repo = handle_input(input)
48
+
49
+ if data_set
50
+ obs = execute_from_file("observations.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
51
+ else
52
+ obs = execute_from_file("observations.rq",repo,:graph)
53
+ end
54
+
55
+ observation_hash(obs,shorten_url)
56
+ end
57
+
58
+ def dataSet(input, select = :label)
59
+ repo = handle_input(input)
60
+
61
+ execute_from_file("dataset.rq",repo,:graph).to_h.first[select].to_s
62
+ end
63
+
64
+ def codes(input, data_set = nil, select = :label)
65
+ repo = handle_input(input)
66
+
67
+ if data_set
68
+ codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
69
+ else
70
+ codes = execute_from_file("codes.rq",repo,:graph).to_h
71
+ end
72
+ codes.map{|c| c.values.map(&:to_s)}.inject({}){|h,el|
73
+ (h[el.first]||=[]) << el.last; h
74
+ }
75
+ end
76
+
77
+ def turtle_to_ruby(turtle_file, select_dataset=nil, shorten_url=true)
78
+ repo = RDF::Repository.load(turtle_file)
79
+
80
+ repo_to_ruby(repo,select_dataset,shorten_url)
81
+ end
82
+
83
+ def repo_to_ruby(repo,select_dataset=nil, shorten_url=true)
84
+ select_dataset = dataSet(repo,:dataset) unless select_dataset
85
+ dims = dimensions(repo,select_dataset)
86
+ meas = measures(repo,select_dataset)
87
+ codes = codes(repo,select_dataset)
88
+ data = observations(repo,select_dataset,shorten_url)
89
+ {measures: meas, dimensions: dims, coded_dimensions: codes, data: data}
90
+ end
91
+ end
92
+ end
93
+ end