bio-publisci 0.0.8 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +10 -0
  3. data/Rakefile +1 -1
  4. data/bin/bio-publisci-server +50 -0
  5. data/features/reader_steps.rb +1 -1
  6. data/lib/bio-publisci.rb +11 -2
  7. data/lib/bio-publisci/datacube_model.rb +92 -88
  8. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +0 -1
  9. data/lib/bio-publisci/dataset/data_cube.rb +78 -44
  10. data/lib/bio-publisci/dataset/dataset_for.rb +26 -27
  11. data/lib/bio-publisci/metadata/metadata_model.rb +21 -23
  12. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +5 -5
  13. data/lib/bio-publisci/output.rb +1 -1
  14. data/lib/bio-publisci/parser.rb +130 -12
  15. data/lib/bio-publisci/post_processor.rb +95 -0
  16. data/lib/bio-publisci/query/query_helper.rb +13 -8
  17. data/lib/bio-publisci/readers/arff.rb +1 -1
  18. data/lib/bio-publisci/readers/base.rb +57 -0
  19. data/lib/bio-publisci/readers/csv.rb +2 -5
  20. data/lib/bio-publisci/readers/dataframe.rb +2 -2
  21. data/lib/bio-publisci/readers/maf.rb +199 -0
  22. data/lib/bio-publisci/readers/r_cross.rb +6 -10
  23. data/lib/bio-publisci/readers/r_matrix.rb +1 -1
  24. data/lib/bio-publisci/writers/base.rb +16 -0
  25. data/lib/bio-publisci/writers/json.rb +18 -0
  26. data/resources/maf_example.maf +10 -0
  27. data/resources/maf_rdf.ttl +1173 -0
  28. data/resources/primer.ttl +38 -0
  29. data/resources/queries/gene.rq +16 -0
  30. data/resources/queries/hugo_to_ensembl.rq +7 -0
  31. data/resources/queries/maf_column.rq +26 -0
  32. data/resources/queries/patient.rq +11 -0
  33. data/resources/queries/patient_list.rq +11 -0
  34. data/resources/queries/patients_with_mutation.rq +18 -0
  35. data/scripts/get_gene_lengths.rb +50 -0
  36. data/scripts/islet_mlratio.rb +1 -1
  37. data/scripts/scan_islet.rb +1 -1
  38. data/scripts/update_reference.rb +8 -3
  39. data/server/helpers.rb +215 -0
  40. data/server/public/src-min-noconflict/LICENSE +24 -0
  41. data/server/public/src-min-noconflict/ace.js +11 -0
  42. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  43. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  44. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  45. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  46. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  47. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  48. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  49. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  50. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  51. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  52. data/server/public/src-min-noconflict/ext-split.js +1 -0
  53. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  54. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  55. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  56. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  57. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  58. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  59. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  60. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  61. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  62. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  63. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  64. data/server/public/src-min-noconflict/worker-css.js +1 -0
  65. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  66. data/server/public/src-min-noconflict/worker-json.js +1 -0
  67. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  68. data/server/public/src-min-noconflict/worker-php.js +1 -0
  69. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  70. data/server/routes.rb +123 -0
  71. data/server/views/dsl.haml +65 -0
  72. data/server/views/dump.haml +3 -0
  73. data/server/views/import.haml +35 -0
  74. data/server/views/new_repository.haml +25 -0
  75. data/server/views/query.haml +28 -0
  76. data/server/views/repository.haml +25 -0
  77. data/spec/ORM/data_cube_orm_spec.rb +1 -0
  78. data/spec/bnode_spec.rb +66 -0
  79. data/spec/data_cube_spec.rb +66 -63
  80. data/spec/dataset_for_spec.rb +36 -16
  81. data/spec/dsl_spec.rb +41 -0
  82. data/spec/generators/csv_spec.rb +3 -3
  83. data/spec/generators/dataframe_spec.rb +2 -2
  84. data/spec/generators/maf_spec.rb +40 -0
  85. data/spec/generators/r_cross_spec.rb +2 -2
  86. data/spec/generators/r_matrix_spec.rb +2 -2
  87. data/spec/length_lookup_spec.rb +0 -0
  88. data/spec/maf_query_spec.rb +343 -0
  89. data/spec/resource/example.Rhistory +1 -1
  90. data/spec/turtle/bacon +9 -9
  91. data/spec/turtle/reference +43 -43
  92. data/spec/turtle/weather +10 -10
  93. data/spec/writer_spec.rb +16 -2
  94. metadata +212 -61
@@ -3,8 +3,15 @@ module PubliSci
3
3
  class Dataset
4
4
  extend PubliSci::Interactive
5
5
 
6
- def self.for(object, options={}, ask_on_ambiguous=true)
6
+ def self.reader_registry
7
+ @reader_registry ||= {}
8
+ end
7
9
 
10
+ def self.register_reader(extension,klass)
11
+ reader_registry[extension] = klass
12
+ end
13
+
14
+ def self.for(object, options={}, ask_on_ambiguous=true)
8
15
  if options == false || options == true
9
16
  ask_on_ambiguous = options
10
17
  options = {}
@@ -20,13 +27,20 @@ module PubliSci
20
27
  raise "Can't load file #{object}; file type inference not yet implemented"
21
28
  end
22
29
 
23
- case extension
24
- when ".RData"
25
- r_object(object, options, ask_on_ambiguous)
26
- when /.csv/i
27
- PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
30
+ if reader_registry.keys.include? extension
31
+ reader_registry[extension].new.automatic(object,options,ask_on_ambiguous)
28
32
  else
29
- false
33
+ case extension
34
+ when ".RData"
35
+ r_object(object, options, ask_on_ambiguous)
36
+ when /.csv/i
37
+ PubliSci::Readers::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
38
+ when /.arff/i
39
+ PubliSci::Readers::ARFF.new.generate_n3(object)
40
+ else
41
+ # false
42
+ raise "Unkown Extension #{extension}"
43
+ end
30
44
  end
31
45
  elsif object =~ %r{htt(p|ps)://.+}
32
46
  self.for(download(object).path, options, ask_on_ambiguous) || RDF::Statement.new(RDF::URI(object), RDF::URI('http://semanticscience.org/resource/hasValue'), IO.read(download(object).path)).to_s
@@ -44,12 +58,6 @@ module PubliSci
44
58
  end
45
59
  end
46
60
 
47
- # def for_remote
48
- # addr = object
49
- # tmp = download(object)
50
- # self.for(tmp.path) || "#{addr} <http://semanticscience.org/resource/"
51
- # end
52
-
53
61
  def self.download(uri)
54
62
  out = Tempfile.new(uri.split('/').last)
55
63
  out.write open(uri).read
@@ -57,15 +65,6 @@ module PubliSci
57
65
  out
58
66
  end
59
67
 
60
- # private
61
- # def self.reader_exists?(object)
62
- # if object.is_a? String
63
- # if File.exist? object
64
-
65
- # elsif
66
- # end
67
- # end
68
-
69
68
  def self.r_object(object, options={}, ask_on_ambiguous=true)
70
69
  if object.is_a? String
71
70
  con = Rserve::Connection.new
@@ -80,7 +79,7 @@ module PubliSci
80
79
  r_classes = con.eval("class(#{var})").to_ruby
81
80
 
82
81
  if r_classes.include? "data.frame"
83
- df = PubliSci::Reader::Dataframe.new
82
+ df = PubliSci::Readers::Dataframe.new
84
83
  unless options[:dimensions] || !ask_on_ambiguous
85
84
  dims = con.eval("names(#{var})").to_ruby
86
85
  puts "Which dimensions? #{dims}"
@@ -101,7 +100,7 @@ module PubliSci
101
100
  df.generate_n3(con.eval(var),var,options)
102
101
 
103
102
  elsif r_classes.include? "cross"
104
- bc = PubliSci::Reader::RCross.new
103
+ bc = PubliSci::Readers::RCross.new
105
104
 
106
105
  unless options[:measures] || !ask_on_ambiguous
107
106
  pheno_names = con.eval("names(#{var}$pheno)").to_ruby
@@ -122,7 +121,7 @@ module PubliSci
122
121
  bc.generate_n3(con, var, base, options)
123
122
 
124
123
  elsif r_classes.include? "matrix"
125
- mat = PubliSci::Reader::RMatrix.new
124
+ mat = PubliSci::Readers::RMatrix.new
126
125
 
127
126
  unless options[:measures] || !ask_on_ambiguous
128
127
  puts "Row label"
@@ -149,13 +148,13 @@ module PubliSci
149
148
 
150
149
  mat.generate_n3(con, var, base, options)
151
150
  else
152
- raise "no PubliSci::Reader found for #{r_classes}"
151
+ raise "no PubliSci::Readers found for #{r_classes}"
153
152
  end
154
153
 
155
154
  elsif object.is_a? Rserve::REXP
156
155
  if object.attr.payload["class"].payload.first
157
156
 
158
- df = PubliSci::Reader::Dataframe.new
157
+ df = PubliSci::Readers::Dataframe.new
159
158
 
160
159
  var = nil
161
160
 
@@ -1,27 +1,25 @@
1
- module PubliSci
2
- class Metadata
3
- module Model
4
- PROV ||= RDF::Vocabulary.new(RDF::URI.new('http://www.w3.org/ns/prov#'))
5
- QB ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
6
- DCT ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/dc/terms/'))
7
- # dct:title "#{fields[:title]}";
8
- # dct:creator "#{fields[:creator]}";
9
- # rdfs:comment "#{fields[:description]}";
10
- # dct:description "#{fields[:description]}";
11
- # dct:issued "#{fields[:date]}"^^xsd:date.
1
+ begin
2
+ require 'spira'
3
+ module PubliSci
4
+ class Metadata
5
+ module Model
6
+ PROV ||= RDF::Vocabulary.new(RDF::URI.new('http://www.w3.org/ns/prov#'))
7
+ QB ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
8
+ DCT ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/dc/terms/'))
12
9
 
13
- class Meta < Spira::Base
14
- type PROV.Entity
15
- type QB.DataSet
16
- property :label, predicate: RDF::RDFS.label
17
- property :comment, predicate: RDF::RDFS.comment
18
- property :description, predicate: DCT.description
19
- property :creator, predicate: DCT.creator
20
- property :issued, predicate: DCT.issued
10
+
11
+ class Meta < Spira::Base
12
+ type PROV.Entity
13
+ type QB.DataSet
14
+ property :label, predicate: RDF::RDFS.label
15
+ property :comment, predicate: RDF::RDFS.comment
16
+ property :description, predicate: DCT.description
17
+ property :creator, predicate: DCT.creator
18
+ property :issued, predicate: DCT.issued
19
+ end
21
20
  end
22
21
  end
23
22
  end
24
- end
25
- # rescue LoadError
26
- # puts "spira not installed, ORM unavailable"
27
- # end
23
+ rescue LoadError
24
+ # puts "spira not installed, ORM unavailable"
25
+ end
@@ -1,7 +1,7 @@
1
1
  require 'rdf/4store'
2
2
 
3
- # begin
4
- # require 'spira'
3
+ begin
4
+ require 'spira'
5
5
 
6
6
  module PubliSci
7
7
  class Prov
@@ -104,6 +104,6 @@ module PubliSci
104
104
  end
105
105
  end
106
106
  end
107
- # rescue LoadError
108
- # puts "spira not installed, ORM unavailable"
109
- # end
107
+ rescue LoadError
108
+ # puts "spira not installed, ORM unavailable"
109
+ end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Reader
2
+ module Readers
3
3
  module Output
4
4
  def output(string, options={},append=false)
5
5
  options[:type] = [:string] unless options[:type]
@@ -1,8 +1,8 @@
1
1
  module PubliSci
2
2
  module Parser
3
3
 
4
- def is_uri?(string)
5
- RDF::Resource(string).valid?
4
+ def is_uri?(obj)
5
+ RDF::Resource(obj).valid?
6
6
  end
7
7
 
8
8
  def sanitize(array)
@@ -14,7 +14,7 @@ module PubliSci
14
14
  if is_uri? entry
15
15
  processed << entry.gsub(/[\s]/,'_')
16
16
  else
17
- processed << entry.gsub(/[\s\.]/,'_')
17
+ processed << entry.gsub(/[\s]/,'_')
18
18
  end
19
19
  else
20
20
  processed << entry
@@ -95,18 +95,24 @@ module PubliSci
95
95
  end
96
96
  end
97
97
 
98
- def to_resource(obj, options)
98
+ def to_resource(obj, options={})
99
99
  if obj.is_a? String
100
- obj = "<#{obj}>" if is_uri? obj
101
100
 
102
- #TODO decide the right way to handle missing values, since RDF has no null
103
- #probably throw an error here since a missing resource is a bigger problem
104
- obj = "NA" if obj.empty?
101
+ if is_uri? obj
102
+ obj = RDF::Resource(obj).to_base unless obj[/\w+:\w/]
103
+ else
104
+
105
+ #TODO decide the right way to handle missing values, since RDF has no null
106
+ #probably throw an error here since a missing resource is a bigger problem
107
+ obj = "rdf:nil" if obj.empty?
108
+ obj= obj.to_s.gsub(' ','_')
109
+ end
105
110
 
111
+ obj
106
112
  #TODO remove special characters (faster) as well (eg '?')
107
- obj.gsub(' ','_').gsub('?','')
113
+
108
114
  elsif obj == nil && options[:encode_nulls]
109
- '"NA"'
115
+ 'rdf:nil'
110
116
  elsif obj.is_a? Numeric
111
117
  #resources cannot be referred to purely by integer (?)
112
118
  "n"+obj.to_s
@@ -115,7 +121,7 @@ module PubliSci
115
121
  end
116
122
  end
117
123
 
118
- def to_literal(obj, options)
124
+ def to_literal(obj, options={})
119
125
  if obj.is_a? String
120
126
  # Depressing that there's no more elegant way to check if a string is
121
127
  # a number...
@@ -128,12 +134,124 @@ module PubliSci
128
134
  end
129
135
  elsif obj == nil && options[:encode_nulls]
130
136
  #TODO decide the right way to handle missing values, since RDF has no null
131
- '"NA"'
137
+ 'rdf:nil'
138
+ else
139
+ obj
140
+ end
141
+ end
142
+
143
+ def is_complex?(obj)
144
+ obj.is_a? Array
145
+ end
146
+
147
+ def add_node(n,str="")
148
+
149
+ raise "need index or identifier to generate blank nodes" unless n
150
+ raise "need base string or blank string for blank node" unless str.is_a? String
151
+ if str["node"]
152
+ ret = str[0..-2] + "/#{n}" + ">"
153
+ ret
154
+ # str[0..-2] + "/#{n}" + ">"
132
155
  else
156
+ "<node/#{n}>"
157
+ end
158
+ end
159
+
160
+ def encode_value(obj,options={}, node_index=nil, node_str = "")
161
+ if RDF::Resource(obj).valid?
162
+ to_resource(obj,options)
163
+ elsif obj && obj.is_a?(String) && (obj[0]=="<" && obj[-1] = ">")
133
164
  obj
165
+ elsif obj.is_a?(Array)
166
+ node_str = add_node(node_index,node_str)
167
+ ["#{node_str}" ] + [bnode_value(obj, node_index, node_str, options)]
168
+ else
169
+ to_literal(obj,options)
134
170
  end
135
171
  end
136
172
 
173
+ def bnode_value(obj, node_index, node_str, options)
174
+ # TODO - Implement proper recursion
175
+ # TODO - check if object is "a" (rdf:type) => or convert rdf:type to "a"
176
+ str = ""
177
+ subnodes = []
178
+ if obj.is_a?(Array) # && obj.size == 2
179
+ if obj.size == 2
180
+ if obj[0].is_a?(String)
181
+ if is_complex?(obj[1])
182
+ str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
183
+ subnodes << encode_value(obj[1], options, node_index, node_str)
184
+ else
185
+ str << "#{to_resource(obj[0])} #{encode_value(obj[1], options, node_index, node_str)} "
186
+ end
187
+ elsif obj[0].is_a?(Array) && obj[1].is_a?(Array)
188
+ newnode = add_node(0,node_str)
189
+ v1 = bnode_value(obj[0], 0, node_str, options)
190
+ v2 = bnode_value(obj[1], 1, node_str, options)
191
+
192
+ if v1.is_a? Array
193
+ subnodes << v1
194
+ v1 = nil
195
+ end
196
+
197
+ if v2.is_a? Array
198
+ subnodes << v2
199
+ v2 = nil
200
+ end
201
+
202
+ if v1
203
+ str << "#{v1} ;"
204
+ end
205
+
206
+ str << "\n#{v2} .\n" if v2
207
+ end
208
+ elsif obj.all?{|ent| ent.is_a? Array}
209
+ obj.each{|ent|
210
+ bn = bnode_value(ent,node_index,node_str,options)
211
+ if bn.is_a? String
212
+ str << bn + "\n"
213
+ else
214
+ str << bn[0] + "\n"
215
+ subnodes << bn[1]
216
+ end
217
+ }
218
+ end
219
+ else
220
+ raise "Invalid Structured value: #{obj}"
221
+ end
222
+
223
+ if subnodes.size > 0
224
+ [str, subnodes.flatten].flatten
225
+ else
226
+ str
227
+ end
228
+ end
229
+
230
+ def turtle_indent(turtle_str)
231
+ tabs = 0
232
+ turtle_str.split("\n").map{|str|
233
+ case str[-1]
234
+ when "."
235
+ last_tabs = tabs
236
+ tabs = 0
237
+ (" " * last_tabs) + str
238
+ when ";"
239
+ last_tabs = tabs
240
+ tabs = 1 if tabs == 0
241
+ (" " * last_tabs) + str
242
+ else
243
+ last_tabs = tabs
244
+ if str.size < 2
245
+ tabs = 0
246
+ else
247
+ tabs += 1
248
+ end
249
+ (" " * last_tabs) + str
250
+ end
251
+ }.join("\n")
252
+
253
+ end
254
+
137
255
  def strip_uri(uri)
138
256
  uri = uri.to_s.dup
139
257
  uri[-1] = '' if uri[-1] == '>'
@@ -0,0 +1,95 @@
1
+ module PubliSci
2
+
3
+ class SADI_request
4
+ def self.send_request(service, turtle)
5
+ response = RestClient.post(service, turtle, content_type: 'text/rdf+n3', accept: 'text/rdf+n3')
6
+ RDF::Repository.new << RDF::Turtle::Reader.new(response)
7
+ end
8
+
9
+ def self.fetch_async(service,turtle)
10
+ gr = send_request(service,turtle)
11
+
12
+ rdfs = RDF::Vocabulary.new("http://www.w3.org/2000/01/rdf-schema#")
13
+ polls = RDF::Query.execute(gr) do
14
+ pattern [:obj, rdfs.isDefinedBy, :def]
15
+ end
16
+
17
+ poll_time = {}
18
+ polls.map(&:def).select{|res| res.to_s["?poll="]}.each{|poll_url|
19
+ poll_time[poll_url.to_s] = Time.now
20
+ }
21
+
22
+ results = []
23
+ until results.size == poll_time.keys.size
24
+ poll_url = poll_time.sort_by{|k,v| v}.first.first
25
+ t = Time.now
26
+
27
+ if poll_time[poll_url] > t
28
+ puts "no poll urls ready, sleeping #{poll_time[poll_url] - t}"
29
+ sleep poll_time[poll_url] - t
30
+ end
31
+
32
+ result = poll(poll_url)
33
+ if result.is_a? Fixnum
34
+ puts "#{poll_url} Response not ready, waiting #{result}"
35
+ poll_time[poll_url] = Time.now + result
36
+ else
37
+ results << result
38
+ end
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ def self.poll(url)
45
+ resp = RestClient.get(url, accept: 'text/rdf+n3'){ |response, request, result, &block|
46
+ if [301, 302, 307].include? response.code
47
+ wait = response.headers[:retry_after]
48
+ if wait
49
+ return wait.to_i
50
+ else
51
+ response.follow_redirection(request, result, &block)
52
+ end
53
+ else
54
+ response.return!(request, result, &block)
55
+ end
56
+ }
57
+ resp.body
58
+ end
59
+
60
+ def self.try_fetch(poll_url)
61
+ puts "polling #{poll_url}"
62
+ loop do
63
+ result = poll(poll_url)
64
+ if result.is_a? Fixnum
65
+ return result
66
+ else
67
+ return RDF::Repository.new << RDF::Turtle::Reader.new(result)
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ class PostProcessor
74
+
75
+
76
+ def self.process(infile,outfile,pattern)
77
+
78
+ tmp = Tempfile.new('annot_temp')
79
+ open(infile).each_line{|line|
80
+ if line[pattern]
81
+ line.scan(pattern).each{|loc|
82
+ line.sub!(pattern,yield(loc.first))
83
+ }
84
+ tmp.write(line)
85
+ else
86
+ tmp.write(line)
87
+ end
88
+ }
89
+
90
+ FileUtils.copy(tmp.path,outfile)
91
+
92
+ outfile
93
+ end
94
+ end
95
+ end