activerubic 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/active_rubic.rb CHANGED
@@ -3,8 +3,7 @@
3
3
  #
4
4
  # The engines are also attempted to be drop-in replaceable by offering an abstraction method find(). see ActiveRubic::Base for the API reference.
5
5
  # - some of it is still undocumented!!
6
-
7
-
6
+ #
8
7
  # The supported backends currently include:
9
8
  #
10
9
  # - ActiveRecord
@@ -21,7 +20,7 @@
21
20
  # http://openlab.savonia-amk.fi/wiki/images/1/12/Rubics_cube.png
22
21
  #
23
22
  # Authors:: Mikael Lammentausta, Lauri Miettinen
24
- # Copyright:: Copyright (c) 2007 Savonia University of Applied Sciences
23
+ # Copyright:: Copyright (c) 2007-2008 Savonia University of Applied Sciences
25
24
  # License:: MIT
26
25
  #
27
26
  # = Examples
@@ -239,25 +238,11 @@
239
238
  # => ["Kuopion museo"]
240
239
  #
241
240
  ############################################################################
242
-
243
- RUBIC_VERSION = '0.8.0'
244
-
245
241
  module ActiveRubic
246
- VERSION = RUBIC_VERSION
247
- class ActiveRubicError < StandardError #:nodoc:
248
- end
249
- class AdapterNotSpecifiedError < ActiveRubicError # :nodoc:
250
- end
251
- class AdapterNotFoundError < ActiveRubicError # :nodoc:
252
- end
253
- class ConnectionNotEstablishedError < ActiveRubicError #:nodoc:
254
- end
255
- class ConnectionFailedError < ActiveRubicError #:nodoc:
256
- end
257
- class RubicNotFoundError < ActiveRubicError #:nodoc:
258
- end
242
+ VERSION = '0.8.1'
259
243
  end
260
244
 
245
+ RUBIC_VERSION = ActiveRubic::VERSION
261
246
 
262
247
  ### the data sources that will be queried
263
248
  RUBIC_DATASTORES = [ :jena, :joseki, :mysql, :lucene, :geolucene ] unless defined? RUBIC_DATASTORES
@@ -265,6 +250,20 @@ RUBIC_DATASTORES = [ :jena, :joseki, :mysql, :lucene, :geolucene ] unless define
265
250
  # unused
266
251
  RUBIC_CONNECTION_ADAPTERS = [ :rubinstein ] unless defined?(RUBIC_CONNECTION_ADAPTERS)
267
252
 
253
+ # adding active_rdf subdirectory to the ruby loadpath
254
+ file = File.symlink?(__FILE__) ? File.readlink(__FILE__) : __FILE__
255
+ this_dir = File.dirname(File.expand_path(file))
256
+ $: << this_dir
257
+ $: << this_dir + '/active_rubic/'
258
+ $: << this_dir + '/active_rubic/objectmanager'
259
+ $: << this_dir + '/active_rubic/objectmanager'
260
+ $: << this_dir + '/active_rubic/objectmanager/rdf'
261
+ $: << this_dir + '/active_rubic/objectmanager/owl'
262
+ $: << this_dir + '/active_rubic/active_rubinstein'
263
+ RUBIC_HOME = this_dir #unless defined? RUBIC_HOME
264
+
265
+ #################################################
266
+
268
267
  ### set environment
269
268
  if defined? RAILS_ROOT
270
269
  RUBINSTEIN_HOME = RAILS_ROOT + '/lib/rubinstein' unless defined? RUBINSTEIN_HOME
@@ -274,7 +273,7 @@ else
274
273
  if ENV['RUBINSTEIN_HOME'] then
275
274
  RUBINSTEIN_HOME = ENV['RUBINSTEIN_HOME']
276
275
  else
277
- # using File.dirname causes /usr/bin/rake to break the system.
276
+ # File.dirname($0) will kill /usr/bin/rake
278
277
  RUBINSTEIN_HOME = File.dirname($0)
279
278
  end
280
279
  end
@@ -288,12 +287,19 @@ else
288
287
  end
289
288
  end
290
289
 
291
- ### load logger + Rubinstein libraries
292
- unless (( defined? @@log ) || ( defined? logger ))
293
- require RUBINSTEIN_HOME + '/lib/logger'
294
- load_logger
290
+ #################################################
291
+
292
+ def load_adapter s
293
+ begin
294
+ require s
295
+ rescue Exception => e
296
+ @@log.error "Could not load adapter #{s}: #{e}"
297
+ end
295
298
  end
296
299
 
300
+ #################################################
301
+ # load standard classes that need to be loaded at startup
302
+
297
303
  require 'rubygems'
298
304
 
299
305
  ### load the DRb server functions
@@ -301,9 +307,15 @@ require 'drb/drb'
301
307
  # require 'thread'
302
308
  # require 'gen'
303
309
 
310
+ ### load logger + Rubinstein libraries
311
+ unless (( defined? @@log ) || ( defined? logger ))
312
+ require RUBINSTEIN_HOME + '/lib/logger'
313
+ load_logger
314
+ end
315
+
316
+ # Load ActiveRDF, and immediately after this update the Namespace class to the patched version that support various abstractions and URI rules.
304
317
  require 'active_rdf'
305
- # load the namespace patch to ActiveRDF
306
- require RUBIC_HOME + '/lib/objectmanager/namespace'
318
+ require RUBIC_HOME + '/objectmanager/namespace'
307
319
 
308
320
  # Namespace configuration
309
321
  require RUBINSTEIN_HOME + '/config/namespaces'
@@ -311,6 +323,8 @@ require RUBINSTEIN_HOME + '/config/namespaces'
311
323
  ### load helper methods
312
324
  require RUBINSTEIN_HOME + '/lib/helpers'
313
325
 
326
+ #################################################
327
+
314
328
  ### start ActiveRubic
315
329
  begin
316
330
 
@@ -324,10 +338,10 @@ begin
324
338
 
325
339
  ### load the base
326
340
  @@log.rubic " => loading ActiveRubinstein"
327
- require RUBIC_HOME + '/lib/active_rubinstein/base'
341
+ require RUBIC_HOME + '/active_rubinstein/base'
328
342
 
329
343
  @@log.rubic " => loading ActiveRubic"
330
- require RUBIC_HOME + '/lib/active_rubic/base'
344
+ require RUBIC_HOME + '/active_rubic/base'
331
345
 
332
346
  @@log.debug " => including ActiveRubinstein to ActiveRubic::Base"
333
347
  ActiveRubic::Base.class_eval do
@@ -339,11 +353,11 @@ begin
339
353
 
340
354
  ### load models
341
355
  @@log.rubic " * loading ActiveRDF extensions"
342
- require RUBIC_HOME + '/lib/objectmanager/rdfs/resource'
343
- require RUBIC_HOME + '/lib/objectmanager/rdfs/image' # should be in the schema
344
- require RUBIC_HOME + '/lib/objectmanager/owl/class'
345
- require RUBIC_HOME + '/lib/objectmanager/owl/thing'
346
- require RUBIC_HOME + '/lib/objectmanager/owl/object_property'
356
+ require RUBIC_HOME + '/objectmanager/rdfs/resource'
357
+ require RUBIC_HOME + '/objectmanager/rdfs/image' # should be in the schema
358
+ require RUBIC_HOME + '/objectmanager/owl/class'
359
+ require RUBIC_HOME + '/objectmanager/owl/thing'
360
+ require RUBIC_HOME + '/objectmanager/owl/object_property'
347
361
 
348
362
  ### load the backend libraries only when required
349
363
  unless @@in_daemon_mode then
@@ -362,4 +376,19 @@ rescue
362
376
  # raise ActiveRubic::ActiveRubicError, $!
363
377
  end
364
378
 
365
- # EOF
379
+ module ActiveRubic
380
+ class ActiveRubicError < StandardError #:nodoc:
381
+ end
382
+ class AdapterNotSpecifiedError < ActiveRubicError # :nodoc:
383
+ end
384
+ class AdapterNotFoundError < ActiveRubicError # :nodoc:
385
+ end
386
+ class ConnectionNotEstablishedError < ActiveRubicError #:nodoc:
387
+ end
388
+ class ConnectionFailedError < ActiveRubicError #:nodoc:
389
+ end
390
+ class RubicNotFoundError < ActiveRubicError #:nodoc:
391
+ end
392
+ end
393
+
394
+ # EOF
@@ -0,0 +1,357 @@
1
+ module ActiveRubic
2
+
3
+ # = Methods
4
+ #
5
+ # == find()
6
+ # accepts two type of queries
7
+ # - an RDFS::Resource object, with options that specify the requested value
8
+ # - action; a Ruby Symbol
9
+ #
10
+ # actions are defined in storage-specific objects:
11
+ # - ActiveRubinstein::Base,
12
+ # - ActiveRubinstein::SparqlFormulator
13
+ # - ActiveRubinstein::JenaQuery
14
+ # - ActiveRubinstein::LuceneSearcher
15
+ # - ActiveRubinstein::JosekiQuery
16
+ #
17
+ # == search()
18
+ # string query from Lucene indices
19
+ #
20
+ # == sparql()
21
+ # accepts a Ruby Query and executes it to storages that accept SPARQL
22
+ #
23
+ # EXAMPLES:
24
+ # - ActiveRubic::Base.find( subject, :label, :from => :jena )
25
+ # - this is the function called by RDFS::Resource.label
26
+ #
27
+ # - ActiveRubic::Base.sparql( Query.new.distinct( :gp ).where( :gp, GEO::point, KUOPIO::kuopion_museo ) )
28
+ # - this is the function called by RDFS::Resource.geopoint
29
+ #
30
+ #############################################################################
31
+ class Base
32
+
33
+ def initialize # :nodoc:
34
+ end
35
+
36
+ unless defined? RAILS_ROOT
37
+ require RUBINSTEIN_HOME + '/lib/rails_database'
38
+ @@log.rubic " * connecting to Rails database.."
39
+ @@rails_db = RailsDB.new( 'precursor' )
40
+ @@log.rubic "@@rails_db = #{@@rails_db.inspect}"
41
+ else
42
+ @@log.rubic " * Running in Rails environment"
43
+ end
44
+
45
+ class << self # Class methods
46
+
47
+ public
48
+
49
+ # checks if the servers are alive, tries to re-initialize ActiveRubinstein once.
50
+ def check_connection( options={} )
51
+ begin
52
+ ActiveRubinstein::Base.new() if options[ :reinit_rubinstein ]
53
+ ActiveRubinstein::JenaQuery.server.ping
54
+ rescue
55
+ # try to re-initialize once
56
+ unless options[ :reinit_rubinstein ]
57
+ check_connection( :reinit_rubinstein => true )
58
+ else
59
+ raise ConnectionNotEstablishedError, $!
60
+ end
61
+ end
62
+ end
63
+
64
+ # Sets the Rubinstein query parameters.
65
+ #
66
+ # Parameters:
67
+ # - 1, lang (Hash; :lang => 'en')
68
+ def set_query_options( options={} )
69
+ lang = options[ :lang ]
70
+
71
+ # set the language by which Rubinstein queries.
72
+ if lang
73
+ @@log.rubic "Using language #{lang} (#{lang.class})"
74
+ ActiveRubinstein::Base.set_language( lang )
75
+ end
76
+ end
77
+
78
+ # The main method for queries.
79
+ #
80
+ # options:
81
+ # - :data_stores = Array
82
+ # - :suffice_to = Array of data_stores that are trusted to return sufficient results. By default all data stores are untrusted to be complete, and the results of each are combined. The results of servers given in the Array.
83
+ # - :combine = Array of data stores that are queried and the results combined. If the trusted server returns results, trust that the contents is sufficient.
84
+ #
85
+ # EXAMPLES:
86
+ # - triple-based: find( RDFS::label, :subject => KUOPIO::kuopion_museo )
87
+ # - abstract: find( :images_of, :subject => KUOPIO::puijon_torni )
88
+ def find(*args)
89
+ options = extract_options_from_args!(args)
90
+ # validate_find_options(options)
91
+ # set_readonly_option!(options)
92
+ @@log.rubic " * Find: #{args}, #{options}"
93
+
94
+ case args.first
95
+ ###########################################
96
+ # when :first then find_initial(options)
97
+
98
+ ###########################################
99
+ when :all then
100
+ return serial_query_handler( :all, options )
101
+
102
+ ###########################################
103
+ # else find_from_resources(args, options)
104
+ else
105
+ return serial_query_handler( args.first, options )
106
+ end
107
+ end
108
+
109
+ # Executes a SPARQL query on the backends that support this. calls serial_query_handler, which calls the query validator.
110
+ #
111
+ # EXAMPLES:
112
+ # - sparql( Query.new.distinct( :s ).where( :s, KUOPIO::isDesignedBy, TOIMIJAT::j_v_stromberg ) )
113
+ def sparql( query, *args, &block )
114
+ unless query.is_a? Query
115
+ @@log.error "The first parameter must be a Query"
116
+ else
117
+ options = extract_options_from_args!(args)
118
+ @@log.rubic " * Received SPARQL; options: #{options}"
119
+ @@log.query " * Sparql: #{query.to_sp}"
120
+ return serial_query_handler( query, options )
121
+ end
122
+ end
123
+ alias :find_by_sparql :sparql
124
+ alias :query :sparql
125
+
126
+ # Counterpart to find(). Instead of resource-based queries, this method takes in strings and returns resources from Lucene indices.
127
+ #
128
+ # The indices are defined in Rubinstein/config/lucene_indices.rb
129
+ #
130
+ # Parameters:
131
+ # - 1 keywords (String)
132
+ # - 2 options (Hash;
133
+ # :index => :images,
134
+ # :limit => @@search_limit,
135
+ # :score_limit => 0.0,
136
+ # :original_keywords => search_string,
137
+ # :from => :lucene | :geolucene
138
+ def search( keywords, options={} )
139
+ @@log.rubic " * Search: #{keywords}, #{options}"
140
+
141
+ options.update :lucene_branch => :standard
142
+ return ActiveRubinstein::LuceneSearcher.search( keywords, options )
143
+ end
144
+
145
+ private
146
+
147
+ # checks if the data source can run this query.
148
+ def backend_can_execute?( query, backend )
149
+ if (( query.is_a? Query ) and ( backend == :lucene ))
150
+ return false
151
+ else
152
+ return true
153
+ end
154
+ end
155
+
156
+ # runs a query on data stores sequentially. a huge method and perhaps should be split.
157
+ def serial_query_handler(query, options)
158
+ # query the data_stores set at options, (all by default)
159
+ data_stores = options[ :from ] || RUBIC_DATASTORES
160
+ data_stores = [ data_stores ] unless data_stores.is_a? Array
161
+
162
+ # save the data stores to options
163
+ options.update( :data_stores => data_stores )
164
+
165
+ limit = options[ :limit ]
166
+
167
+ # If suffice_to is set, then do not begin any more data store queries after processing that storage.
168
+ first_sufficient_storage = options[ :suffice_to ] || data_stores.last
169
+
170
+ @@log.deep " => Querying data stores: #{data_stores.join(', ')}"
171
+ @@log.deep " - Will suffice to data from #{first_sufficient_storage}" if data_stores.size > 1
172
+ if !data_stores.include? first_sufficient_storage
173
+ @@log.warn " ! #{first_sufficient_storage} was set to suffice but is not within the data store list."
174
+ end
175
+
176
+ ####################
177
+ ### EXECUTION LOOP
178
+ # loops each data store and combines the results. duplicates are checked.
179
+
180
+ results = Hash.new
181
+
182
+ ## TODO: handle TIMEOUT
183
+ ############################################################
184
+ begin
185
+ data_stores.each do |storage|
186
+ if backend_can_execute?( query, data_stores ) then
187
+ t1 = Time.now
188
+
189
+ # Execute ###########################################
190
+ my_results = run_query( storage, query, options )
191
+ #####################################################
192
+
193
+ # if results?
194
+ if ( !my_results.nil? && my_results.size > 0 ) then
195
+ results.update storage => my_results
196
+ @@log.rubic " => Got #{my_results.size} results from #{storage} in #{Time.now-t1} sec"
197
+ else
198
+ @@log.rubic " => No results from #{storage} in #{Time.now-t1} sec"
199
+ end
200
+
201
+ # no further queries, this data storage contains enough information
202
+ if ((storage == first_sufficient_storage) || (first_sufficient_storage == :first))
203
+ @@log.rubic " ! as #{storage} is sufficient, no further data storages are queried." unless storage == data_stores.last
204
+ break # is taken by me
205
+ end
206
+
207
+ end # if backend_can_execute?
208
+ end # data_stores.each
209
+ rescue
210
+ @@log.error $!
211
+ end
212
+ ############################################################
213
+
214
+ # merge results from the hash to a single array
215
+ merge = Array.new
216
+ results.each_value do |q| q.uniq.each do |resource|
217
+ # @@log.debug resource.inspect
218
+ merge << resource unless resource.nil?
219
+ end
220
+ end
221
+
222
+ # returning nil causes not-so-well designed models to crash :)
223
+ if merge.nil? or merge.empty?
224
+ @@log.rubic " => No results"
225
+ return Array.new
226
+
227
+ ############################################################
228
+ else
229
+
230
+ # merge arrays (flatten) and remove nils (compact)
231
+ # NOTE: running compact! here will cause nil results
232
+ merge.flatten!
233
+
234
+ @@log.deep " => #{merge.size} altogether"
235
+
236
+ # only allow these objects in results.
237
+ allowed_classes = [
238
+ RDFS::Resource,
239
+ OWL::Class,
240
+ OWL::ObjectProperty,
241
+ OWL::Thing,
242
+ Image,
243
+ String
244
+ ]
245
+
246
+ unfamiliar_classes = Array.new
247
+ results = Array.new
248
+
249
+ merge.each do |resource|
250
+ if ( allowed_classes.include? resource.class ) then
251
+ results << resource
252
+ else
253
+ # unaccepted classes are collected just to inform the log reader.
254
+ unfamiliar_classes << resource.class
255
+ end
256
+ end
257
+
258
+ # remove duplicates
259
+ results.uniq!
260
+
261
+ # warning log
262
+ if unfamiliar_classes.any? then
263
+ @@log.warn " -- #{unfamiliar_classes.uniq.join(', ')} class types in results"
264
+ @@log.warn " -- Returning only #{allowed_classes.join(', ')}"
265
+ end
266
+
267
+ @@log.rubic " => TOTAL: #{results.size} unique results"
268
+ end
269
+ ############################################################
270
+
271
+ # todo: insert limit into queries, this will do for now..
272
+ if limit then
273
+ @@log.deep " => Returning with limit #{limit}"
274
+ return results[ 0 .. (limit-1) ]
275
+ else
276
+ @@log.deep " => Returning all #{results.size}, no limit"
277
+ @@log.boulderdash results
278
+ return results
279
+ end
280
+
281
+ end
282
+
283
+
284
+ def find_initial(options) # :nodoc:
285
+ # options.update(:limit => 1) unless options[:include]
286
+ # find_every(options).first
287
+ end
288
+
289
+ ########################################################################
290
+
291
+ # conducts the query to the correct backend handler.
292
+ #
293
+ # Parameters:
294
+ # - 1 storage (Symbol)
295
+ # - 2 query
296
+ # - 3 options (Hash)
297
+ def run_query( storage, query, options )
298
+ @@log.rubic " => query: #{query} (#{query.class})"
299
+
300
+ case storage
301
+ when :mysql
302
+ return ActiveRubic::Cacher.read( query, options )
303
+
304
+ when :lucene
305
+ options.update :lucene_branch => :standard
306
+ return ActiveRubinstein::LuceneSearcher.execute( query, options )
307
+
308
+ when :geolucene
309
+ options.update :lucene_branch => :geolucene
310
+ return ActiveRubinstein::LuceneSearcher.execute( query, options )
311
+
312
+ when :jena
313
+ return ActiveRubinstein::JenaQuery.execute( query, options )
314
+
315
+ when :joseki
316
+ return ActiveRubinstein::JosekiQuery.execute( query, options )
317
+ end
318
+ end
319
+
320
+ protected
321
+
322
+ def extract_options_from_args!(args) #:nodoc:
323
+ args.last.is_a?(Hash) ? args.pop : {}
324
+ end
325
+
326
+ end
327
+ end
328
+ end
329
+
330
+
331
+ class Array
332
+
333
+ # enables randomizing
334
+ def randomize
335
+ arr = self.dup
336
+ self.collect { arr.slice!( rand( arr.length ) ) }
337
+ end
338
+
339
+ def randomize!
340
+ arr = self.dup
341
+ # bug here, the array size was halved
342
+ #result = arr.collect { arr.slice!( rand( arr.length ) ) }
343
+ result = self.collect { arr.slice!( rand( arr.length ) ) }
344
+ self.replace result
345
+ end
346
+
347
+ end
348
+
349
+ #--
350
+ # ideas for future modules:
351
+ # ActiveRubinstein::Virtuoso
352
+ # ActiveRubinstein::MysqlCache
353
+ # ActiveRubinstein::Terra # environment, the 'terrain'
354
+ # ActiveRubinstein::Hidria # the Hidria spacefolk ;-)
355
+ # ? => wikipedia
356
+ #
357
+ #++