publisci 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
@@ -0,0 +1,34 @@
1
+ module PubliSci
2
+ class Prov
3
+ module Dereferencable
4
+ def dereference
5
+ self.map{|x|
6
+ if x.is_a? Symbol
7
+ raise "Unknown#{method.capitalize}: #{x}" unless Prov.registry[method.to_sym][x]
8
+ Prov.registry[method.to_sym][x]
9
+ else
10
+ x
11
+ end
12
+ }
13
+ end
14
+
15
+ def method
16
+ raise "must be overridden"
17
+ end
18
+
19
+ def [](index)
20
+ self.dereference.fetch(index)
21
+ # if self.fetch(index).is_a? Symbol
22
+ # raise "UnknownEntity: #{self.fetch(index)}" unless Prov.entities[self.fetch(index)]
23
+ # Prov.entities[self.fetch(index)]
24
+ # else
25
+ # self.fetch(index)
26
+ # end
27
+ end
28
+
29
+ def map_(&blk)
30
+ self.dereference.map(&blk)
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,27 @@
1
+ module PubliSci
2
+ module Registry
3
+ def register(name,object)
4
+ # puts "register #{name} #{object} #{associations.size}"
5
+ name = name.to_sym if name
6
+ if symbol_for(object)
7
+ sub = symbol_for(object)
8
+ else
9
+ sub = object.class.to_s.split('::').last.downcase.to_sym
10
+ end
11
+ if name
12
+ (registry[sub] ||= {})[name] = object
13
+ else
14
+ (registry[sub] ||= []) << object
15
+ end
16
+ end
17
+
18
+ def registry
19
+ @registry ||= {}
20
+ end
21
+
22
+ #should be overridden
23
+ def symbol_for(object)
24
+ false
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,8 @@
1
+ module PubliSci
2
+ module Vocabulary
3
+ def vocabulary(url)
4
+ raise "InvalidVocabulary: #{url} is not a valid URI" unless RDF::Resource(url).valid?
5
+ RDF::Vocabulary.new(url)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,27 @@
1
+ module PubliSci
2
+ module Readers
3
+ module Output
4
+ def output(string, options={},append=false)
5
+ options[:type] = [:string] unless options[:type]
6
+ base = options[:file_base]
7
+ name = options[:file_name]
8
+ types = Array(options[:type])
9
+
10
+ if types.include? :print
11
+ puts string
12
+ end
13
+
14
+ if types.include? :file
15
+ raise "no file specified output" unless name
16
+
17
+ method = append ? 'a' : 'w'
18
+ open("#{base}#{name}", method) { |f| f.write str }
19
+ end
20
+
21
+ if types.include? :string
22
+ string
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,266 @@
1
+ module PubliSci
2
+ module Parser
3
+
4
+ def is_uri?(obj)
5
+ RDF::Resource(obj).valid?
6
+ end
7
+
8
+ def sanitize(array)
9
+ #remove spaces and other special characters
10
+ array = Array(array)
11
+ processed = []
12
+ array.map{|entry|
13
+ if entry.is_a? String
14
+ if is_uri? entry
15
+ processed << entry.gsub(/[\s]/,'_')
16
+ else
17
+ processed << entry.gsub(/[\s]/,'_')
18
+ end
19
+ else
20
+ processed << entry
21
+ end
22
+ }
23
+ processed
24
+ end
25
+
26
+ def sanitize_hash(h)
27
+ mappings = {}
28
+ h.keys.map{|k|
29
+ if(k.is_a? String)
30
+ mappings[k] = k.gsub(' ','_')
31
+ end
32
+ }
33
+
34
+ h.keys.map{|k|
35
+ h[mappings[k]] = h.delete(k) if mappings[k]
36
+ }
37
+
38
+ h
39
+ end
40
+
41
+ def load_string(string,repo=RDF::Repository.new)
42
+ f = Tempfile.new('repo')
43
+ f.write(string)
44
+ f.close
45
+ repo.load(f.path, :format => :ttl)
46
+ f.unlink
47
+ repo
48
+ end
49
+
50
+ def get_ary(query_results,method='to_s')
51
+ query_results.map{|solution|
52
+ solution.to_a.map{|entry|
53
+ if entry.last.respond_to? method
54
+ entry.last.send(method)
55
+ else
56
+ entry.last.to_s
57
+ end
58
+ }
59
+ }
60
+ end
61
+
62
+ def get_hashes(query_results,method=nil)
63
+ arr=[]
64
+ query_results.map{|solution|
65
+ h={}
66
+ solution.map{|element|
67
+ if method && element[1].respond_to?(method)
68
+ h[element[0]] = element[1].send(method)
69
+ else
70
+ h[element[0]] = element[1]
71
+ end
72
+ }
73
+ arr << h
74
+ }
75
+ arr
76
+ end
77
+
78
+ def observation_hash(query_results,shorten_uris=false,method='to_s')
79
+ h={}
80
+ query_results.map{|sol|
81
+ (h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
82
+ }
83
+
84
+ if shorten_uris
85
+ newh= {}
86
+ h.map{|k,v|
87
+ newh[strip_uri(k)] ||= {}
88
+ v.map{|kk,vv|
89
+ newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
90
+ }
91
+ }
92
+ newh
93
+ else
94
+ h
95
+ end
96
+ end
97
+
98
+ def to_resource(obj, options={})
99
+ if obj.is_a? String
100
+
101
+ if is_uri? obj
102
+ obj = RDF::Resource(obj).to_base unless obj[/\w+:\w/]
103
+ else
104
+
105
+ #TODO decide the right way to handle missing values, since RDF has no null
106
+ #probably throw an error here since a missing resource is a bigger problem
107
+ obj = "rdf:nil" if obj.empty?
108
+ obj= obj.to_s.gsub(' ','_')
109
+ end
110
+
111
+ obj
112
+ #TODO remove special characters (faster) as well (eg '?')
113
+
114
+ elsif obj == nil && options[:encode_nulls]
115
+ 'rdf:nil'
116
+ elsif obj.is_a? Numeric
117
+ #resources cannot be referred to purely by integer (?)
118
+ "n"+obj.to_s
119
+ else
120
+ obj
121
+ end
122
+ end
123
+
124
+ def to_literal(obj, options={})
125
+ if obj.is_a? String
126
+ # Depressing that there's no more elegant way to check if a string is
127
+ # a number...
128
+ if val = Integer(obj) rescue nil
129
+ val
130
+ elsif val = Float(obj) rescue nil
131
+ val
132
+ else
133
+ '"'+obj+'"'
134
+ end
135
+ elsif obj == nil && options[:encode_nulls]
136
+ #TODO decide the right way to handle missing values, since RDF has no null
137
+ 'rdf:nil'
138
+ else
139
+ obj
140
+ end
141
+ end
142
+
143
+ def is_complex?(obj)
144
+ obj.is_a? Array
145
+ end
146
+
147
+ def add_node(n,str="")
148
+
149
+ raise "need index or identifier to generate blank nodes" unless n
150
+ raise "need base string or blank string for blank node" unless str.is_a? String
151
+ if str["node"]
152
+ ret = str[0..-2] + "/#{n}" + ">"
153
+ ret
154
+ # str[0..-2] + "/#{n}" + ">"
155
+ else
156
+ "<node/#{n}>"
157
+ end
158
+ end
159
+
160
+ def encode_value(obj,options={}, node_index=nil, node_str = "")
161
+ if RDF::Resource(obj).valid?
162
+ to_resource(obj,options)
163
+ elsif obj && obj.is_a?(String) && (obj[0]=="<" && obj[-1] = ">")
164
+ obj
165
+ elsif obj.is_a?(Array)
166
+ node_str = add_node(node_index,node_str)
167
+ ["#{node_str}" ] + [bnode_value(obj, node_index, node_str, options)]
168
+ else
169
+ to_literal(obj,options)
170
+ end
171
+ end
172
+
173
+ def bnode_value(obj, node_index, node_str, options)
174
+ # TODO - Implement proper recursion
175
+ # TODO - check if object is "a" (rdf:type) => or convert rdf:type to "a"
176
+ str = ""
177
+ subnodes = []
178
+ if obj.is_a?(Array) # && obj.size == 2
179
+ if obj.size == 2
180
+ if obj[0].is_a?(String)
181
+ if is_complex?(obj[1])
182
+ str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
183
+ subnodes << encode_value(obj[1], options, node_index, node_str)
184
+ else
185
+ str << "#{to_resource(obj[0])} #{encode_value(obj[1], options, node_index, node_str)} "
186
+ end
187
+ elsif obj[0].is_a?(Array) && obj[1].is_a?(Array)
188
+ newnode = add_node(0,node_str)
189
+ v1 = bnode_value(obj[0], 0, node_str, options)
190
+ v2 = bnode_value(obj[1], 1, node_str, options)
191
+
192
+ if v1.is_a? Array
193
+ subnodes << v1
194
+ v1 = nil
195
+ end
196
+
197
+ if v2.is_a? Array
198
+ subnodes << v2
199
+ v2 = nil
200
+ end
201
+
202
+ if v1
203
+ str << "#{v1} ;"
204
+ end
205
+
206
+ str << "\n#{v2} .\n" if v2
207
+ end
208
+ elsif obj.all?{|ent| ent.is_a? Array}
209
+ obj.each{|ent|
210
+ bn = bnode_value(ent,node_index,node_str,options)
211
+ if bn.is_a? String
212
+ str << bn + "\n"
213
+ else
214
+ str << bn[0] + "\n"
215
+ subnodes << bn[1]
216
+ end
217
+ }
218
+ end
219
+ else
220
+ raise "Invalid Structured value: #{obj}"
221
+ end
222
+
223
+ if subnodes.size > 0
224
+ [str, subnodes.flatten].flatten
225
+ else
226
+ str
227
+ end
228
+ end
229
+
230
+ def turtle_indent(turtle_str)
231
+ tabs = 0
232
+ turtle_str.split("\n").map{|str|
233
+ case str[-1]
234
+ when "."
235
+ last_tabs = tabs
236
+ tabs = 0
237
+ (" " * last_tabs) + str
238
+ when ";"
239
+ last_tabs = tabs
240
+ tabs = 1 if tabs == 0
241
+ (" " * last_tabs) + str
242
+ else
243
+ last_tabs = tabs
244
+ if str.size < 2
245
+ tabs = 0
246
+ else
247
+ tabs += 1
248
+ end
249
+ (" " * last_tabs) + str
250
+ end
251
+ }.join("\n")
252
+
253
+ end
254
+
255
+ def strip_uri(uri)
256
+ uri = uri.to_s.dup
257
+ uri[-1] = '' if uri[-1] == '>'
258
+ uri.to_s.split('/').last.split('#').last
259
+ end
260
+
261
+ def strip_prefixes(string)
262
+ string.to_s.split(':').last
263
+ end
264
+
265
+ end
266
+ end
@@ -0,0 +1,95 @@
1
+ module PubliSci
2
+
3
+ class SADI_request
4
+ def self.send_request(service, turtle)
5
+ response = RestClient.post(service, turtle, content_type: 'text/rdf+n3', accept: 'text/rdf+n3')
6
+ RDF::Repository.new << RDF::Turtle::Reader.new(response)
7
+ end
8
+
9
+ def self.fetch_async(service,turtle)
10
+ gr = send_request(service,turtle)
11
+
12
+ rdfs = RDF::Vocabulary.new("http://www.w3.org/2000/01/rdf-schema#")
13
+ polls = RDF::Query.execute(gr) do
14
+ pattern [:obj, rdfs.isDefinedBy, :def]
15
+ end
16
+
17
+ poll_time = {}
18
+ polls.map(&:def).select{|res| res.to_s["?poll="]}.each{|poll_url|
19
+ poll_time[poll_url.to_s] = Time.now
20
+ }
21
+
22
+ results = []
23
+ until results.size == poll_time.keys.size
24
+ poll_url = poll_time.sort_by{|k,v| v}.first.first
25
+ t = Time.now
26
+
27
+ if poll_time[poll_url] > t
28
+ puts "no poll urls ready, sleeping #{poll_time[poll_url] - t}"
29
+ sleep poll_time[poll_url] - t
30
+ end
31
+
32
+ result = poll(poll_url)
33
+ if result.is_a? Fixnum
34
+ puts "#{poll_url} Response not ready, waiting #{result}"
35
+ poll_time[poll_url] = Time.now + result
36
+ else
37
+ results << result
38
+ end
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ def self.poll(url)
45
+ resp = RestClient.get(url, accept: 'text/rdf+n3'){ |response, request, result, &block|
46
+ if [301, 302, 307].include? response.code
47
+ wait = response.headers[:retry_after]
48
+ if wait
49
+ return wait.to_i
50
+ else
51
+ response.follow_redirection(request, result, &block)
52
+ end
53
+ else
54
+ response.return!(request, result, &block)
55
+ end
56
+ }
57
+ resp.body
58
+ end
59
+
60
+ def self.try_fetch(poll_url)
61
+ puts "polling #{poll_url}"
62
+ loop do
63
+ result = poll(poll_url)
64
+ if result.is_a? Fixnum
65
+ return result
66
+ else
67
+ return RDF::Repository.new << RDF::Turtle::Reader.new(result)
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ class PostProcessor
74
+
75
+
76
+ def self.process(infile,outfile,pattern)
77
+
78
+ tmp = Tempfile.new('annot_temp')
79
+ open(infile).each_line{|line|
80
+ if line[pattern]
81
+ line.scan(pattern).each{|loc|
82
+ line.sub!(pattern,yield(loc.first))
83
+ }
84
+ tmp.write(line)
85
+ else
86
+ tmp.write(line)
87
+ end
88
+ }
89
+
90
+ FileUtils.copy(tmp.path,outfile)
91
+
92
+ outfile
93
+ end
94
+ end
95
+ end