publisci 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
@@ -0,0 +1,112 @@
1
+ module PubliSci
2
+ module Readers
3
+ class RCross
4
+ include PubliSci::Dataset::DataCube
5
+ include PubliSci::Readers::Output
6
+
7
+ def generate_n3(client, var, outfile_base, options={})
8
+ meas = measures(client,var,options)
9
+ dim = dimensions(client,var,options)
10
+ codes = codes(client,var,options)
11
+
12
+ #write structure
13
+ open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
14
+
15
+ n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
16
+ chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
17
+ chromosome_list.map{|chrom|
18
+ open(outfile_base+"_#{chrom}.ttl",'w'){|f| f.write prefixes(var,options)}
19
+ entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
20
+
21
+ #get genotype data (currently only for chromosome 1)
22
+ geno_chr = client.eval("#{var}$geno$'#{chrom}'")
23
+
24
+ #get number of markers per individual
25
+
26
+ #write observations
27
+ n_individuals.times{|indi|
28
+ obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
29
+ labels = labels_for(obs_data,chrom.to_s,indi)
30
+ open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
31
+ puts "(#{chrom}) #{indi}/#{n_individuals}" unless options[:quiet]
32
+ }
33
+ }
34
+
35
+ end
36
+
37
+ def structure(client,var,options={})
38
+ meas = measures(client,var,options)
39
+ dim = dimensions(client,var,options)
40
+ codes = codes(client,var,options)
41
+
42
+ str = prefixes(var,options)
43
+ str << data_structure_definition(meas,dim,codes,var,options)
44
+ str << dataset(var,options)
45
+ component_specifications(meas, dim, codes, var, options).map{ |c| str << c }
46
+ measure_properties(meas,var,options).map{|m| str << m}
47
+
48
+ str
49
+ end
50
+
51
+ def measures(client, var, options={})
52
+ pheno_names = client.eval("names(#{var}$pheno)").to_ruby
53
+ if options[:measures]
54
+ (pheno_names & options[:measures]) | ["genotype","markerpos","marker"]
55
+ else
56
+ pheno_names | ["genotype","markerpos","marker"]
57
+ end
58
+ # measure_properties(measures,var,options)
59
+ end
60
+
61
+ def dimensions(client, var, options={})
62
+ # dimension_properties([""],var)
63
+ []
64
+ end
65
+
66
+ def codes(client, var, options={})
67
+ []
68
+ end
69
+
70
+ def labels_for(data,chr,individual,options={})
71
+ labels=(((data.first.last.size*individual)+1)..(data.first.last.size*(individual+1))).to_a.map(&:to_s)
72
+ labels.map{|l| l.insert(0,"#{chr}_")}
73
+ labels
74
+ end
75
+
76
+ def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
77
+ data = {}
78
+
79
+ data["chr"] = []
80
+ data["genotype"] = []
81
+ data["individual"] = []
82
+ data["marker"] = []
83
+ data["markerpos"] = []
84
+
85
+ pheno_names = client.eval("names(#{var}$pheno)").to_ruby
86
+ pheno_names.map{|name|
87
+ data[name] = []
88
+ }
89
+ data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
90
+
91
+ pheno_names.map{|name|
92
+ data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno$#{name}").to_ruby[row_individ])
93
+ }
94
+
95
+ num_markers = geno_chr.payload.first.to_ruby.column_size
96
+ data["chr"] << (1..num_markers).to_a.fill(chr)
97
+ data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
98
+ data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
99
+ data["markerpos"] << geno_chr.payload["map"].to_a
100
+
101
+ data.map{|k,v| v.flatten!}
102
+ data
103
+ end
104
+
105
+ def num_individuals(client, var, options={})
106
+ client.eval("#{var}$pheno").payload.first.to_ruby.size
107
+ end
108
+
109
+
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,176 @@
1
+ module PubliSci
2
+ module Readers
3
+ class RMatrix
4
+ include PubliSci::Dataset::DataCube
5
+
6
+ #NOTE; this is pretty much hard coded for Karl's application right now, and doesn't
7
+ # do any dimension or code generation. Since its a set of LOD scores indexed by dimension
8
+ # and marker the usual datacube generator wont work (I think). In the future adding an option
9
+ # to specify this kind of a dataset would probably be useful
10
+
11
+
12
+ def generate_n3(client, var, outfile_base, options={})
13
+ meas = measures(client,var,options)
14
+ dim = dimensions(client,var,options)
15
+ codes = codes(client,var,options)
16
+
17
+ outvar = sanitize([var]).first
18
+
19
+ probes_per_file = options[:probes_per_file] || 100
20
+ col_select = "colnames"
21
+ col_select = "names" if options[:type] == :dataframe
22
+
23
+ #write structure
24
+ open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,outvar,options)}
25
+
26
+ probes=client.eval("#{col_select}(#{var})").to_ruby
27
+ if probes == nil
28
+ client.eval("colnames(#{var})=1:ncol(#{var})")
29
+ probes=client.eval("#{col_select}(#{var})").to_ruby
30
+ end
31
+ markers = rows(client,var,options)
32
+
33
+ if options[:print]
34
+ puts prefixes(var,options)
35
+ end
36
+
37
+ if options[:output] == :string
38
+ str = prefixes(var,options)
39
+ end
40
+
41
+ probes.each_with_index{|probe,i|
42
+ #write prefixes and erase old file on first run
43
+ unless options[:print] || options[:output] == :string
44
+ open(outfile_base+"_#{i/probes_per_file}.ttl",'w'){|f| f.write prefixes(var,options)} if i % probes_per_file == 0
45
+ end
46
+ i+=1
47
+ obs_data = observation_data(client,var,i,markers,options)
48
+ labels = labels_for(client,var,probe)
49
+
50
+ # labels = sanitize(labels)
51
+ # return obs_data
52
+ if options[:print]
53
+ observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| puts obs}
54
+ end
55
+
56
+ if options[:output] == :string
57
+ observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| str << obs}
58
+ end
59
+
60
+ unless options[:print] || options[:output] == :string
61
+ open(outfile_base+"_#{i/probes_per_file}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,outvar,options).map{|obs| f.write obs}}
62
+ puts "#{i}/#{probes.size}" unless options[:quiet]
63
+ end
64
+ }
65
+
66
+ if options[:output] == :string
67
+ str
68
+ end
69
+ end
70
+
71
+ def structure(client,var,outvar,options={})
72
+ meas = measures(client,var,options)
73
+ dim = dimensions(client,var,options)
74
+ codes = codes(client,var,options)
75
+
76
+ str = prefixes(var, options)
77
+ str << data_structure_definition(meas,[],codes,outvar,options)
78
+ str << dataset(outvar,options)
79
+ component_specifications(meas, dim, codes, var, options).map{ |c| str << c }
80
+ measure_properties(meas,var,options).map{|m| str << m}
81
+
82
+ str
83
+ end
84
+
85
+ #for now just make everything a measure
86
+ def measures(client, var, options={})
87
+ if options[:measures]
88
+ options[:measures]
89
+ else
90
+ ["probe","marker","value"]
91
+ end
92
+ # measure_properties(measures,var,options)
93
+ end
94
+
95
+ def dimensions(client, var, options={})
96
+ # dimension_properties([""],var)
97
+ []
98
+ end
99
+
100
+ def codes(client, var, options={})
101
+ []
102
+ end
103
+
104
+ def labels_for(connection,var,probe_id,options={})
105
+ row_names = connection.eval("row.names(#{var})")
106
+ # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
107
+ if row_names == connection.eval('NULL')
108
+ row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
109
+ else
110
+ row_names = row_names.payload
111
+ end
112
+
113
+ labels = (1..(row_names.size)).to_a.map(&:to_s)
114
+ labels = labels.map{|l|
115
+ l.insert(0,probe_id.to_s + "_")
116
+ }
117
+
118
+ labels
119
+ end
120
+
121
+ def rows(connection,var,options={})
122
+ row_names = connection.eval("row.names(#{var})")
123
+ #hacky solution because rserve client's .to_ruby method doesn't fully work
124
+ if row_names == connection.eval('NULL')
125
+ row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
126
+ else
127
+ row_names = row_names.payload
128
+ end
129
+ row_names
130
+ end
131
+
132
+ def observation_data(client, var, probe_number, row_names, options={})
133
+
134
+ data = {}
135
+ # geno_chr = client.eval("#{var}$geno$'#{chr}'")
136
+ # n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
137
+ # entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
138
+ col_label = "probe"
139
+ row_label = "marker"
140
+ val_label = "value"
141
+
142
+ if options[:measures]
143
+ col_label = options[:measures][0] || "probe"
144
+ row_label = options[:measures][1] || "marker"
145
+ val_label = options[:measures][2] || "value"
146
+ end
147
+
148
+ data["#{col_label}"] = []
149
+ data["#{row_label}"] = []
150
+ data["#{val_label}"] = []
151
+
152
+ # n_individuals.times{|row_individ|
153
+ # puts "#{row_individ}/#{n_individuals}"
154
+
155
+ col_select = "colnames"
156
+ col_select = "names" if options[:type] == :dataframe
157
+
158
+ if options[:type] == :dataframe
159
+ probe_obj = client.eval("#{var}[[#{probe_number}]]").to_ruby
160
+ else
161
+ probe_obj = client.eval("#{var}[,#{probe_number}]").to_ruby
162
+ end
163
+ # puts probe_obj
164
+ probe_id = client.eval("#{col_select}(#{var})[[#{probe_number}]]").to_ruby
165
+ data["#{col_label}"] = (1..(probe_obj.size)).to_a.fill(probe_id)
166
+ probe_obj.each_with_index{|lod,i|
167
+ data["#{row_label}"] << row_names[i]
168
+ data["#{val_label}"] << lod
169
+ }
170
+
171
+ data.map{|k,v| v.flatten!}
172
+ data
173
+ end
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,56 @@
1
+ module PubliSci
2
+ # handles connection and messaging to/from the triple store
3
+ class Store
4
+ include PubliSci::Query
5
+
6
+ def defaults
7
+ {
8
+ type: :fourstore,
9
+ url: "http://localhost:8080", #TODO port etc should eventually be extracted from URI if given
10
+ replace: false
11
+ }
12
+ end
13
+
14
+ def add(file,graph)
15
+ if @options[:type] == :graph
16
+ throw "please provide an RDF::Repository" unless graph.is_a? RDF::Repository
17
+ graph.load(file)
18
+ @store = graph
19
+ @store
20
+ elsif @options[:type] == :fourstore
21
+ if @options[:replace]
22
+ `curl -T #{file} -H 'Content-Type: application/x-turtle' #{@options[:url]}/data/http%3A%2F%2Frqtl.org%2F#{graph}`
23
+ else
24
+ `curl --data-urlencode data@#{file} -d 'graph=http%3A%2F%2Frqtl.org%2F#{graph}' -d 'mime-type=application/x-turtle' #{@options[:url]}/data/`
25
+ end
26
+ end
27
+ end
28
+
29
+ def add_all(dir, graph, pattern=nil)
30
+ pattern = /.+\.ttl/ if pattern == :turtle || pattern == :ttl
31
+
32
+ files = Dir.entries(dir) - %w(. ..)
33
+ files = files.grep(pattern) if pattern.is_a? Regexp
34
+ nfiles = files.size
35
+ n = 0
36
+ files.each{|file| puts file + " #{n+=1}/#{nfiles} files"; puts add(file,graph)}
37
+ end
38
+
39
+ def initialize(options={})
40
+ @options = defaults.merge(options)
41
+ end
42
+
43
+ def query(string)
44
+ # execute(string, )
45
+ if @options[:type] == :graph
46
+ execute(string, @store, :graph)
47
+ elsif @options[:type] == :fourstore
48
+ execute(string, @options[:url], :fourstore)
49
+ end
50
+ end
51
+
52
+ def url
53
+ @options[:url]
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,91 @@
1
+ module PubliSci
2
+ module Writers
3
+ class ARFF < Base
4
+ # include PubliSci::Query
5
+ # include PubliSci::Parser
6
+ # include PubliSci::Analyzer
7
+
8
+ def build_arff(relation, attributes, data, source)
9
+ str = <<-EOS
10
+ % 1. Title: #{relation.capitalize} Database
11
+ %
12
+ % 2. Sources:
13
+ % (a) Generated from RDF source #{source}
14
+ %
15
+ @RELATION #{relation}
16
+
17
+ EOS
18
+
19
+ Hash[attributes.sort].map{|attribute,type|
20
+ str << "@ATTRIBUTE #{attribute} #{type}\n"
21
+ }
22
+
23
+ str << "\n@DATA\n"
24
+ data.map { |d| str << Hash[d[1].sort].values.join(',') + "\n" }
25
+
26
+ str
27
+ end
28
+
29
+ def from_turtle(turtle_file, verbose=false)
30
+ puts "loading #{turtle_file}" if verbose
31
+ repo = RDF::Repository.load(turtle_file)
32
+ puts "loaded #{repo.size} statements into temporary repo" if verbose
33
+
34
+ dims = dimensions(repo)
35
+ meas = measures(repo)
36
+ data = observations(repo)
37
+
38
+ relation = dataSet(repo)
39
+ codes = codes(repo)
40
+
41
+ attributes = {}
42
+
43
+ (dims | meas).map{|component|
44
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
45
+ when "xsd:int"
46
+ "integer"
47
+ when "xsd:double"
48
+ "real"
49
+ when :coded
50
+ if dims.include? component
51
+ "{#{codes[component].join(', ')}}"
52
+ else
53
+ "string"
54
+ end
55
+ end
56
+ }
57
+
58
+ build_arff(relation, attributes, data, turtle_file)
59
+ end
60
+
61
+ def from_store(repo, dataset=nil, title=nil, verbose=false)
62
+ # data = observation_hash(execute_from_file("observations.rq",repo,:graph,{"%{dataSet}"=>"<#{dataSet}>"}), true)
63
+
64
+ dims = dimensions(repo,dataset)
65
+ meas = measures(repo,dataset)
66
+ data = observations(repo,dataset)
67
+ codes = codes(repo,dataset)
68
+ attributes = {}
69
+
70
+ (dims | meas).map{|component|
71
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
72
+ when "xsd:int"
73
+ "integer"
74
+ when "xsd:double"
75
+ "real"
76
+ when :coded
77
+ if dims.include? component
78
+ "{#{codes[component].join(', ')}}"
79
+ else
80
+ "string"
81
+ end
82
+ end
83
+ }
84
+
85
+ dataset = dataSet(repo) unless dataset
86
+ title = dataset unless title
87
+ build_arff(title,attributes,data,dataset)
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,93 @@
1
+ module PubliSci
2
+ module Writers
3
+ class Base
4
+ include PubliSci::Query
5
+ include PubliSci::Parser
6
+ include PubliSci::Analyzer
7
+
8
+ def handle_input(input)
9
+ if input.is_a? String
10
+ if File.exist? input
11
+ RDF::Repository.load(input)
12
+ else
13
+ raise "UnkownStringInput: #{input}"
14
+ end
15
+ elsif input.is_a? RDF::Repository
16
+ input
17
+ else
18
+ raise "UnkownInput: #{input}, #{input.class}"
19
+ end
20
+ end
21
+
22
+ def dimensions(input, data_set=nil, select=:label)
23
+ repo = handle_input(input)
24
+
25
+ if data_set
26
+ dims = execute_from_file("dimensions.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
27
+ else
28
+ dims = execute_from_file("dimensions.rq",repo,:graph)
29
+ end
30
+
31
+ dims.to_h.map{|d| d[select].to_s}
32
+ end
33
+
34
+ def measures(input, data_set=nil, select=:label)
35
+ repo = handle_input(input)
36
+
37
+ if data_set
38
+ meas = execute_from_file("measures.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
39
+ else
40
+ meas = execute_from_file("measures.rq",repo,:graph)
41
+ end
42
+
43
+ meas.to_h.map{|d| d[select].to_s}
44
+ end
45
+
46
+ def observations(input, data_set = nil, shorten_url = true)
47
+ repo = handle_input(input)
48
+
49
+ if data_set
50
+ obs = execute_from_file("observations.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
51
+ else
52
+ obs = execute_from_file("observations.rq",repo,:graph)
53
+ end
54
+
55
+ observation_hash(obs,shorten_url)
56
+ end
57
+
58
+ def dataSet(input, select = :label)
59
+ repo = handle_input(input)
60
+
61
+ execute_from_file("dataset.rq",repo,:graph).to_h.first[select].to_s
62
+ end
63
+
64
+ def codes(input, data_set = nil, select = :label)
65
+ repo = handle_input(input)
66
+
67
+ if data_set
68
+ codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
69
+ else
70
+ codes = execute_from_file("codes.rq",repo,:graph).to_h
71
+ end
72
+ codes.map{|c| c.values.map(&:to_s)}.inject({}){|h,el|
73
+ (h[el.first]||=[]) << el.last; h
74
+ }
75
+ end
76
+
77
+ def turtle_to_ruby(turtle_file, select_dataset=nil, shorten_url=true)
78
+ repo = RDF::Repository.load(turtle_file)
79
+
80
+ repo_to_ruby(repo,select_dataset,shorten_url)
81
+ end
82
+
83
+ def repo_to_ruby(repo,select_dataset=nil, shorten_url=true)
84
+ select_dataset = dataSet(repo,:dataset) unless select_dataset
85
+ dims = dimensions(repo,select_dataset)
86
+ meas = measures(repo,select_dataset)
87
+ codes = codes(repo,select_dataset)
88
+ data = observations(repo,select_dataset,shorten_url)
89
+ {measures: meas, dimensions: dims, coded_dimensions: codes, data: data}
90
+ end
91
+ end
92
+ end
93
+ end