bio-publisci 0.0.8 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +10 -0
- data/Rakefile +1 -1
- data/bin/bio-publisci-server +50 -0
- data/features/reader_steps.rb +1 -1
- data/lib/bio-publisci.rb +11 -2
- data/lib/bio-publisci/datacube_model.rb +92 -88
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +0 -1
- data/lib/bio-publisci/dataset/data_cube.rb +78 -44
- data/lib/bio-publisci/dataset/dataset_for.rb +26 -27
- data/lib/bio-publisci/metadata/metadata_model.rb +21 -23
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +5 -5
- data/lib/bio-publisci/output.rb +1 -1
- data/lib/bio-publisci/parser.rb +130 -12
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +13 -8
- data/lib/bio-publisci/readers/arff.rb +1 -1
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +2 -5
- data/lib/bio-publisci/readers/dataframe.rb +2 -2
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +6 -10
- data/lib/bio-publisci/readers/r_matrix.rb +1 -1
- data/lib/bio-publisci/writers/base.rb +16 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +1 -1
- data/scripts/scan_islet.rb +1 -1
- data/scripts/update_reference.rb +8 -3
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +1 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/data_cube_spec.rb +66 -63
- data/spec/dataset_for_spec.rb +36 -16
- data/spec/dsl_spec.rb +41 -0
- data/spec/generators/csv_spec.rb +3 -3
- data/spec/generators/dataframe_spec.rb +2 -2
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +2 -2
- data/spec/generators/r_matrix_spec.rb +2 -2
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/resource/example.Rhistory +1 -1
- data/spec/turtle/bacon +9 -9
- data/spec/turtle/reference +43 -43
- data/spec/turtle/weather +10 -10
- data/spec/writer_spec.rb +16 -2
- metadata +212 -61
@@ -3,8 +3,15 @@ module PubliSci
|
|
3
3
|
class Dataset
|
4
4
|
extend PubliSci::Interactive
|
5
5
|
|
6
|
-
def self.
|
6
|
+
def self.reader_registry
|
7
|
+
@reader_registry ||= {}
|
8
|
+
end
|
7
9
|
|
10
|
+
def self.register_reader(extension,klass)
|
11
|
+
reader_registry[extension] = klass
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.for(object, options={}, ask_on_ambiguous=true)
|
8
15
|
if options == false || options == true
|
9
16
|
ask_on_ambiguous = options
|
10
17
|
options = {}
|
@@ -20,13 +27,20 @@ module PubliSci
|
|
20
27
|
raise "Can't load file #{object}; file type inference not yet implemented"
|
21
28
|
end
|
22
29
|
|
23
|
-
|
24
|
-
|
25
|
-
r_object(object, options, ask_on_ambiguous)
|
26
|
-
when /.csv/i
|
27
|
-
PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
|
30
|
+
if reader_registry.keys.include? extension
|
31
|
+
reader_registry[extension].new.automatic(object,options,ask_on_ambiguous)
|
28
32
|
else
|
29
|
-
|
33
|
+
case extension
|
34
|
+
when ".RData"
|
35
|
+
r_object(object, options, ask_on_ambiguous)
|
36
|
+
when /.csv/i
|
37
|
+
PubliSci::Readers::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
|
38
|
+
when /.arff/i
|
39
|
+
PubliSci::Readers::ARFF.new.generate_n3(object)
|
40
|
+
else
|
41
|
+
# false
|
42
|
+
raise "Unkown Extension #{extension}"
|
43
|
+
end
|
30
44
|
end
|
31
45
|
elsif object =~ %r{htt(p|ps)://.+}
|
32
46
|
self.for(download(object).path, options, ask_on_ambiguous) || RDF::Statement.new(RDF::URI(object), RDF::URI('http://semanticscience.org/resource/hasValue'), IO.read(download(object).path)).to_s
|
@@ -44,12 +58,6 @@ module PubliSci
|
|
44
58
|
end
|
45
59
|
end
|
46
60
|
|
47
|
-
# def for_remote
|
48
|
-
# addr = object
|
49
|
-
# tmp = download(object)
|
50
|
-
# self.for(tmp.path) || "#{addr} <http://semanticscience.org/resource/"
|
51
|
-
# end
|
52
|
-
|
53
61
|
def self.download(uri)
|
54
62
|
out = Tempfile.new(uri.split('/').last)
|
55
63
|
out.write open(uri).read
|
@@ -57,15 +65,6 @@ module PubliSci
|
|
57
65
|
out
|
58
66
|
end
|
59
67
|
|
60
|
-
# private
|
61
|
-
# def self.reader_exists?(object)
|
62
|
-
# if object.is_a? String
|
63
|
-
# if File.exist? object
|
64
|
-
|
65
|
-
# elsif
|
66
|
-
# end
|
67
|
-
# end
|
68
|
-
|
69
68
|
def self.r_object(object, options={}, ask_on_ambiguous=true)
|
70
69
|
if object.is_a? String
|
71
70
|
con = Rserve::Connection.new
|
@@ -80,7 +79,7 @@ module PubliSci
|
|
80
79
|
r_classes = con.eval("class(#{var})").to_ruby
|
81
80
|
|
82
81
|
if r_classes.include? "data.frame"
|
83
|
-
df = PubliSci::
|
82
|
+
df = PubliSci::Readers::Dataframe.new
|
84
83
|
unless options[:dimensions] || !ask_on_ambiguous
|
85
84
|
dims = con.eval("names(#{var})").to_ruby
|
86
85
|
puts "Which dimensions? #{dims}"
|
@@ -101,7 +100,7 @@ module PubliSci
|
|
101
100
|
df.generate_n3(con.eval(var),var,options)
|
102
101
|
|
103
102
|
elsif r_classes.include? "cross"
|
104
|
-
bc = PubliSci::
|
103
|
+
bc = PubliSci::Readers::RCross.new
|
105
104
|
|
106
105
|
unless options[:measures] || !ask_on_ambiguous
|
107
106
|
pheno_names = con.eval("names(#{var}$pheno)").to_ruby
|
@@ -122,7 +121,7 @@ module PubliSci
|
|
122
121
|
bc.generate_n3(con, var, base, options)
|
123
122
|
|
124
123
|
elsif r_classes.include? "matrix"
|
125
|
-
mat = PubliSci::
|
124
|
+
mat = PubliSci::Readers::RMatrix.new
|
126
125
|
|
127
126
|
unless options[:measures] || !ask_on_ambiguous
|
128
127
|
puts "Row label"
|
@@ -149,13 +148,13 @@ module PubliSci
|
|
149
148
|
|
150
149
|
mat.generate_n3(con, var, base, options)
|
151
150
|
else
|
152
|
-
raise "no PubliSci::
|
151
|
+
raise "no PubliSci::Readers found for #{r_classes}"
|
153
152
|
end
|
154
153
|
|
155
154
|
elsif object.is_a? Rserve::REXP
|
156
155
|
if object.attr.payload["class"].payload.first
|
157
156
|
|
158
|
-
df = PubliSci::
|
157
|
+
df = PubliSci::Readers::Dataframe.new
|
159
158
|
|
160
159
|
var = nil
|
161
160
|
|
@@ -1,27 +1,25 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
# rdfs:comment "#{fields[:description]}";
|
10
|
-
# dct:description "#{fields[:description]}";
|
11
|
-
# dct:issued "#{fields[:date]}"^^xsd:date.
|
1
|
+
begin
|
2
|
+
require 'spira'
|
3
|
+
module PubliSci
|
4
|
+
class Metadata
|
5
|
+
module Model
|
6
|
+
PROV ||= RDF::Vocabulary.new(RDF::URI.new('http://www.w3.org/ns/prov#'))
|
7
|
+
QB ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
|
8
|
+
DCT ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/dc/terms/'))
|
12
9
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
|
11
|
+
class Meta < Spira::Base
|
12
|
+
type PROV.Entity
|
13
|
+
type QB.DataSet
|
14
|
+
property :label, predicate: RDF::RDFS.label
|
15
|
+
property :comment, predicate: RDF::RDFS.comment
|
16
|
+
property :description, predicate: DCT.description
|
17
|
+
property :creator, predicate: DCT.creator
|
18
|
+
property :issued, predicate: DCT.issued
|
19
|
+
end
|
21
20
|
end
|
22
21
|
end
|
23
22
|
end
|
24
|
-
|
25
|
-
#
|
26
|
-
|
27
|
-
# end
|
23
|
+
rescue LoadError
|
24
|
+
# puts "spira not installed, ORM unavailable"
|
25
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rdf/4store'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
begin
|
4
|
+
require 'spira'
|
5
5
|
|
6
6
|
module PubliSci
|
7
7
|
class Prov
|
@@ -104,6 +104,6 @@ module PubliSci
|
|
104
104
|
end
|
105
105
|
end
|
106
106
|
end
|
107
|
-
|
108
|
-
#
|
109
|
-
|
107
|
+
rescue LoadError
|
108
|
+
# puts "spira not installed, ORM unavailable"
|
109
|
+
end
|
data/lib/bio-publisci/output.rb
CHANGED
data/lib/bio-publisci/parser.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
module PubliSci
|
2
2
|
module Parser
|
3
3
|
|
4
|
-
def is_uri?(
|
5
|
-
RDF::Resource(
|
4
|
+
def is_uri?(obj)
|
5
|
+
RDF::Resource(obj).valid?
|
6
6
|
end
|
7
7
|
|
8
8
|
def sanitize(array)
|
@@ -14,7 +14,7 @@ module PubliSci
|
|
14
14
|
if is_uri? entry
|
15
15
|
processed << entry.gsub(/[\s]/,'_')
|
16
16
|
else
|
17
|
-
processed << entry.gsub(/[\s
|
17
|
+
processed << entry.gsub(/[\s]/,'_')
|
18
18
|
end
|
19
19
|
else
|
20
20
|
processed << entry
|
@@ -95,18 +95,24 @@ module PubliSci
|
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
98
|
-
def to_resource(obj, options)
|
98
|
+
def to_resource(obj, options={})
|
99
99
|
if obj.is_a? String
|
100
|
-
obj = "<#{obj}>" if is_uri? obj
|
101
100
|
|
102
|
-
|
103
|
-
|
104
|
-
|
101
|
+
if is_uri? obj
|
102
|
+
obj = RDF::Resource(obj).to_base unless obj[/\w+:\w/]
|
103
|
+
else
|
104
|
+
|
105
|
+
#TODO decide the right way to handle missing values, since RDF has no null
|
106
|
+
#probably throw an error here since a missing resource is a bigger problem
|
107
|
+
obj = "rdf:nil" if obj.empty?
|
108
|
+
obj= obj.to_s.gsub(' ','_')
|
109
|
+
end
|
105
110
|
|
111
|
+
obj
|
106
112
|
#TODO remove special characters (faster) as well (eg '?')
|
107
|
-
|
113
|
+
|
108
114
|
elsif obj == nil && options[:encode_nulls]
|
109
|
-
'
|
115
|
+
'rdf:nil'
|
110
116
|
elsif obj.is_a? Numeric
|
111
117
|
#resources cannot be referred to purely by integer (?)
|
112
118
|
"n"+obj.to_s
|
@@ -115,7 +121,7 @@ module PubliSci
|
|
115
121
|
end
|
116
122
|
end
|
117
123
|
|
118
|
-
def to_literal(obj, options)
|
124
|
+
def to_literal(obj, options={})
|
119
125
|
if obj.is_a? String
|
120
126
|
# Depressing that there's no more elegant way to check if a string is
|
121
127
|
# a number...
|
@@ -128,12 +134,124 @@ module PubliSci
|
|
128
134
|
end
|
129
135
|
elsif obj == nil && options[:encode_nulls]
|
130
136
|
#TODO decide the right way to handle missing values, since RDF has no null
|
131
|
-
'
|
137
|
+
'rdf:nil'
|
138
|
+
else
|
139
|
+
obj
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def is_complex?(obj)
|
144
|
+
obj.is_a? Array
|
145
|
+
end
|
146
|
+
|
147
|
+
def add_node(n,str="")
|
148
|
+
|
149
|
+
raise "need index or identifier to generate blank nodes" unless n
|
150
|
+
raise "need base string or blank string for blank node" unless str.is_a? String
|
151
|
+
if str["node"]
|
152
|
+
ret = str[0..-2] + "/#{n}" + ">"
|
153
|
+
ret
|
154
|
+
# str[0..-2] + "/#{n}" + ">"
|
132
155
|
else
|
156
|
+
"<node/#{n}>"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def encode_value(obj,options={}, node_index=nil, node_str = "")
|
161
|
+
if RDF::Resource(obj).valid?
|
162
|
+
to_resource(obj,options)
|
163
|
+
elsif obj && obj.is_a?(String) && (obj[0]=="<" && obj[-1] = ">")
|
133
164
|
obj
|
165
|
+
elsif obj.is_a?(Array)
|
166
|
+
node_str = add_node(node_index,node_str)
|
167
|
+
["#{node_str}" ] + [bnode_value(obj, node_index, node_str, options)]
|
168
|
+
else
|
169
|
+
to_literal(obj,options)
|
134
170
|
end
|
135
171
|
end
|
136
172
|
|
173
|
+
def bnode_value(obj, node_index, node_str, options)
|
174
|
+
# TODO - Implement proper recursion
|
175
|
+
# TODO - check if object is "a" (rdf:type) => or convert rdf:type to "a"
|
176
|
+
str = ""
|
177
|
+
subnodes = []
|
178
|
+
if obj.is_a?(Array) # && obj.size == 2
|
179
|
+
if obj.size == 2
|
180
|
+
if obj[0].is_a?(String)
|
181
|
+
if is_complex?(obj[1])
|
182
|
+
str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
|
183
|
+
subnodes << encode_value(obj[1], options, node_index, node_str)
|
184
|
+
else
|
185
|
+
str << "#{to_resource(obj[0])} #{encode_value(obj[1], options, node_index, node_str)} "
|
186
|
+
end
|
187
|
+
elsif obj[0].is_a?(Array) && obj[1].is_a?(Array)
|
188
|
+
newnode = add_node(0,node_str)
|
189
|
+
v1 = bnode_value(obj[0], 0, node_str, options)
|
190
|
+
v2 = bnode_value(obj[1], 1, node_str, options)
|
191
|
+
|
192
|
+
if v1.is_a? Array
|
193
|
+
subnodes << v1
|
194
|
+
v1 = nil
|
195
|
+
end
|
196
|
+
|
197
|
+
if v2.is_a? Array
|
198
|
+
subnodes << v2
|
199
|
+
v2 = nil
|
200
|
+
end
|
201
|
+
|
202
|
+
if v1
|
203
|
+
str << "#{v1} ;"
|
204
|
+
end
|
205
|
+
|
206
|
+
str << "\n#{v2} .\n" if v2
|
207
|
+
end
|
208
|
+
elsif obj.all?{|ent| ent.is_a? Array}
|
209
|
+
obj.each{|ent|
|
210
|
+
bn = bnode_value(ent,node_index,node_str,options)
|
211
|
+
if bn.is_a? String
|
212
|
+
str << bn + "\n"
|
213
|
+
else
|
214
|
+
str << bn[0] + "\n"
|
215
|
+
subnodes << bn[1]
|
216
|
+
end
|
217
|
+
}
|
218
|
+
end
|
219
|
+
else
|
220
|
+
raise "Invalid Structured value: #{obj}"
|
221
|
+
end
|
222
|
+
|
223
|
+
if subnodes.size > 0
|
224
|
+
[str, subnodes.flatten].flatten
|
225
|
+
else
|
226
|
+
str
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def turtle_indent(turtle_str)
|
231
|
+
tabs = 0
|
232
|
+
turtle_str.split("\n").map{|str|
|
233
|
+
case str[-1]
|
234
|
+
when "."
|
235
|
+
last_tabs = tabs
|
236
|
+
tabs = 0
|
237
|
+
(" " * last_tabs) + str
|
238
|
+
when ";"
|
239
|
+
last_tabs = tabs
|
240
|
+
tabs = 1 if tabs == 0
|
241
|
+
(" " * last_tabs) + str
|
242
|
+
else
|
243
|
+
last_tabs = tabs
|
244
|
+
if str.size < 2
|
245
|
+
tabs = 0
|
246
|
+
else
|
247
|
+
tabs += 1
|
248
|
+
end
|
249
|
+
(" " * last_tabs) + str
|
250
|
+
end
|
251
|
+
}.join("\n")
|
252
|
+
|
253
|
+
end
|
254
|
+
|
137
255
|
def strip_uri(uri)
|
138
256
|
uri = uri.to_s.dup
|
139
257
|
uri[-1] = '' if uri[-1] == '>'
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module PubliSci
|
2
|
+
|
3
|
+
class SADI_request
|
4
|
+
def self.send_request(service, turtle)
|
5
|
+
response = RestClient.post(service, turtle, content_type: 'text/rdf+n3', accept: 'text/rdf+n3')
|
6
|
+
RDF::Repository.new << RDF::Turtle::Reader.new(response)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.fetch_async(service,turtle)
|
10
|
+
gr = send_request(service,turtle)
|
11
|
+
|
12
|
+
rdfs = RDF::Vocabulary.new("http://www.w3.org/2000/01/rdf-schema#")
|
13
|
+
polls = RDF::Query.execute(gr) do
|
14
|
+
pattern [:obj, rdfs.isDefinedBy, :def]
|
15
|
+
end
|
16
|
+
|
17
|
+
poll_time = {}
|
18
|
+
polls.map(&:def).select{|res| res.to_s["?poll="]}.each{|poll_url|
|
19
|
+
poll_time[poll_url.to_s] = Time.now
|
20
|
+
}
|
21
|
+
|
22
|
+
results = []
|
23
|
+
until results.size == poll_time.keys.size
|
24
|
+
poll_url = poll_time.sort_by{|k,v| v}.first.first
|
25
|
+
t = Time.now
|
26
|
+
|
27
|
+
if poll_time[poll_url] > t
|
28
|
+
puts "no poll urls ready, sleeping #{poll_time[poll_url] - t}"
|
29
|
+
sleep poll_time[poll_url] - t
|
30
|
+
end
|
31
|
+
|
32
|
+
result = poll(poll_url)
|
33
|
+
if result.is_a? Fixnum
|
34
|
+
puts "#{poll_url} Response not ready, waiting #{result}"
|
35
|
+
poll_time[poll_url] = Time.now + result
|
36
|
+
else
|
37
|
+
results << result
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
results
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.poll(url)
|
45
|
+
resp = RestClient.get(url, accept: 'text/rdf+n3'){ |response, request, result, &block|
|
46
|
+
if [301, 302, 307].include? response.code
|
47
|
+
wait = response.headers[:retry_after]
|
48
|
+
if wait
|
49
|
+
return wait.to_i
|
50
|
+
else
|
51
|
+
response.follow_redirection(request, result, &block)
|
52
|
+
end
|
53
|
+
else
|
54
|
+
response.return!(request, result, &block)
|
55
|
+
end
|
56
|
+
}
|
57
|
+
resp.body
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.try_fetch(poll_url)
|
61
|
+
puts "polling #{poll_url}"
|
62
|
+
loop do
|
63
|
+
result = poll(poll_url)
|
64
|
+
if result.is_a? Fixnum
|
65
|
+
return result
|
66
|
+
else
|
67
|
+
return RDF::Repository.new << RDF::Turtle::Reader.new(result)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class PostProcessor
|
74
|
+
|
75
|
+
|
76
|
+
def self.process(infile,outfile,pattern)
|
77
|
+
|
78
|
+
tmp = Tempfile.new('annot_temp')
|
79
|
+
open(infile).each_line{|line|
|
80
|
+
if line[pattern]
|
81
|
+
line.scan(pattern).each{|loc|
|
82
|
+
line.sub!(pattern,yield(loc.first))
|
83
|
+
}
|
84
|
+
tmp.write(line)
|
85
|
+
else
|
86
|
+
tmp.write(line)
|
87
|
+
end
|
88
|
+
}
|
89
|
+
|
90
|
+
FileUtils.copy(tmp.path,outfile)
|
91
|
+
|
92
|
+
outfile
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|