activerdf_rdflite 1.0 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ desc "test and package gem"
14
14
  task :default => [:test, :package]
15
15
 
16
16
  # get ActiveRdfVersion from commandline
17
- ActiveRdfVersion = ENV['REL'] || '0.9.7'
17
+ ActiveRdfVersion = ENV['REL'] || '1.0'
18
18
  NAME="activerdf_rdflite"
19
19
  GEMNAME="#{NAME}-#{ActiveRdfVersion}.gem"
20
20
 
@@ -0,0 +1,43 @@
1
+ #class String
2
+ # alias _match match
3
+ # def match(*args)
4
+ # m = _match(args.first)
5
+ # if m && m.length > 1
6
+ # args[1..-1].each_with_index do |name, index|
7
+ # m.instance_eval "def #{name}; self[#{index+1}] end"
8
+ # end
9
+ # end
10
+ # m
11
+ # end
12
+ #end
13
+
14
+ class FetchingAdapter < RDFLite
15
+ ConnectionPool.register_adapter(:fetching,self)
16
+
17
+ # fetches RDF/XML data from given url and adds it to the datastore, using the
18
+ # source url as context identifier.
19
+ def fetch url
20
+ return unless url.match(/http:\/\/(.*)/)
21
+
22
+ # remove fragment identifier from url
23
+ hashidx = url.rindex('#')
24
+ url = url[0..url.rindex('#')-1] unless hashidx.nil?
25
+
26
+ $activerdflog.debug "fetching from #{url}"
27
+
28
+ #model = Redland::Model.new
29
+ #parser = Redland::Parser.new('rdfxml')
30
+ #scan = Redland::Uri.new('http://feature.librdf.org/raptor-scanForRDF')
31
+ #enable = Redland::Literal.new('1')
32
+ #Redland::librdf_parser_set_feature(parser, scan.uri, enable.node)
33
+ #parser.parse_into_model(model, url)
34
+ #triples = Redland::Serializer.ntriples.model_to_string(nil, model)
35
+
36
+ triples = `rapper --scan "#{url}"`
37
+ lines = triples.split($/)
38
+ $activerdflog.debug "found #{lines.size} triples"
39
+
40
+ context = RDFS::Resource.new(url)
41
+ add_ntriples(triples, context)
42
+ end
43
+ end
@@ -9,3 +9,5 @@ end
9
9
  $: << File.dirname(File.expand_path(file))
10
10
 
11
11
  require 'rdflite'
12
+ require 'fetching'
13
+ require 'suggesting'
@@ -1,7 +1,3 @@
1
- # RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
2
- # in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
3
- # search if ferret is installed.
4
- #
5
1
  # Author:: Eyal Oren
6
2
  # Copyright:: (c) 2005-2006 Eyal Oren
7
3
  # License:: LGPL
@@ -10,20 +6,23 @@ require 'sqlite3'
10
6
  require 'active_rdf'
11
7
  require 'federation/connection_pool'
12
8
 
13
- $log.info "loading RDFLite adapter"
9
+ $activerdflog.info "loading RDFLite adapter"
14
10
 
15
11
  begin
16
12
  require 'ferret'
17
13
  @@have_ferret = true
18
14
  rescue LoadError
19
- $log.info "Keyword search is disabled since we could not load Ferret. To
15
+ $activerdflog.info "Keyword search is disabled since we could not load Ferret. To
20
16
  enable, please do \"gem install ferret\""
21
17
  @@have_ferret = false
22
18
  end
23
19
 
20
+ # RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
21
+ # in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
22
+ # search if ferret is installed.
24
23
  class RDFLite < ActiveRdfAdapter
25
24
  ConnectionPool.register_adapter(:rdflite,self)
26
- bool_accessor :keyword_search
25
+ bool_accessor :keyword_search, :reasoning
27
26
 
28
27
  # instantiates RDFLite database
29
28
  # available parameters:
@@ -31,7 +30,7 @@ class RDFLite < ActiveRdfAdapter
31
30
  # * :keyword => true/false (defaults to true)
32
31
  # * :pidx, :oidx, etc. => true/false (enable/disable these indices)
33
32
  def initialize(params = {})
34
- $log.info "initialised rdflite with params #{params.to_s}"
33
+ $activerdflog.info "initialised rdflite with params #{params.to_s}"
35
34
 
36
35
  @reads = true
37
36
  @writes = true
@@ -40,22 +39,17 @@ class RDFLite < ActiveRdfAdapter
40
39
  file = params[:location] || ':memory:'
41
40
  @db = SQLite3::Database.new(file)
42
41
 
43
- # we enable keyword unless the user specifies otherwise
44
- @keyword_search = if params[:keyword].nil?
45
- true
46
- else
47
- params[:keyword]
48
- end
49
-
50
- # we can only do keyword search if ferret is found
42
+ # enable keyword search by default, but only if ferret is found
43
+ @keyword_search = params[:keyword].nil? ? true : params[:keyword]
51
44
  @keyword_search &= @@have_ferret
52
- $log.debug "we #{keyword_search? ? "do" : "don't"} have keyword search"
45
+
46
+ @reasoning = params[:reasoning] || false
53
47
 
54
48
  if keyword_search?
55
49
  # we initialise the ferret index, either as a file or in memory
50
+ infos = Ferret::Index::FieldInfos.new
56
51
 
57
52
  # we setup the fields not to store object's contents
58
- infos = Ferret::Index::FieldInfos.new
59
53
  infos.add_field(:subject, :store => :yes, :index => :no, :term_vector => :no)
60
54
  infos.add_field(:object, :store => :no) #, :index => :omit_norms)
61
55
 
@@ -69,13 +63,11 @@ class RDFLite < ActiveRdfAdapter
69
63
  # turn off filesystem synchronisation for speed
70
64
  @db.synchronous = 'off'
71
65
 
72
- # create triples table. since triples are unique, inserted duplicates are
73
- @db.execute('create table if not exists triple(s,p,o, unique(s,p,o) on conflict ignore)')
66
+ # create triples table. ignores duplicated triples
67
+ @db.execute('create table if not exists triple(s,p,o,c, unique(s,p,o,c) on conflict ignore)')
74
68
 
75
69
  create_indices(params)
76
-
77
- $log.debug("opened connection to #{file}")
78
- $log.debug("database contains #{size} triples")
70
+ @db
79
71
  end
80
72
 
81
73
  # returns the number of triples in the datastore (incl. possible duplicates)
@@ -85,8 +77,8 @@ class RDFLite < ActiveRdfAdapter
85
77
 
86
78
  # returns all triples in the datastore
87
79
  def dump
88
- @db.execute('select s,p,o from triple') do |s,p,o|
89
- [s,p,o].join(' ')
80
+ @db.execute('select s,p,o,c from triple').collect do |s,p,o,c|
81
+ [s,p,o,c].join(' ')
90
82
  end
91
83
  end
92
84
 
@@ -95,37 +87,23 @@ class RDFLite < ActiveRdfAdapter
95
87
  @db.execute('delete from triple')
96
88
  end
97
89
 
98
- # deletes triple(s,p,o) from datastore
99
- # nil parameters match anything: delete(nil,nil,nil) will delete all triples
100
- def delete(s,p,o)
90
+ # deletes triple(s,p,o,c) from datastore
91
+ # symbol parameters match anything: delete(:s,:p,:o) will delete all triples
92
+ # you can specify a context to limit deletion to that context:
93
+ # delete(:s,:p,:o, 'http://context') will delete all triples with that context
94
+ def delete(s,p,o,c=nil)
101
95
  # convert input to internal format
102
- # leave nil input alone (we'll deal with it later)
103
- s = "<#{s.uri}>" unless s.nil?
104
- p = "<#{p.uri}>" unless p.nil?
105
- o = case o
106
- when RDFS::Resource
107
- "<#{o.uri}>"
108
- else
109
- "\"#{o.to_s}\""
110
- end unless o.nil?
111
-
112
- # construct where clause for deletion (for all non-nil input)
113
- where_clauses = []
114
- conditions = []
115
- unless s.nil?
116
- conditions << s
117
- where_clauses << 's = ?'
118
- end
96
+ quad = [s,p,o,c].collect {|r| internalise(r) }
119
97
 
120
- unless p.nil?
121
- conditions << p
122
- where_clauses << 'p = ?'
123
- end
124
-
125
- unless o.nil?
126
- conditions << o
127
- where_clauses << 'o = ?'
128
- end
98
+ # construct where clause for deletion (for all non-nil input)
99
+ where_clauses = []
100
+ conditions = []
101
+ quad.each_with_index do |r,i|
102
+ unless r.nil?
103
+ conditions << r
104
+ where_clauses << "#{SPOC[i]} = ?"
105
+ end
106
+ end
129
107
 
130
108
  # construct delete string
131
109
  ds = 'delete from triple'
@@ -133,28 +111,29 @@ class RDFLite < ActiveRdfAdapter
133
111
 
134
112
  # execute delete string with possible deletion conditions (for each
135
113
  # non-empty where clause)
114
+ $activerdflog.debug("deleting #{[s,p,o,c].join(' ')}")
136
115
  @db.execute(ds, *conditions)
137
- $log.debug(sprintf("sending delete query: #{ds}", *conditions))
138
116
 
139
117
  # delete literal from ferret index
140
118
  @ferret.search_each("subject:\"#{s}\", object:\"#{o}\"") do |idx, score|
141
- $log.debug "deleting #{o} => #{s} from ferret index"
142
119
  @ferret.delete(idx)
143
120
  end if keyword_search?
121
+
122
+ @db
144
123
  end
145
124
 
146
125
  # adds triple(s,p,o) to datastore
147
126
  # s,p must be resources, o can be primitive data or resource
148
- def add(s,p,o)
127
+ def add(s,p,o,c=nil)
149
128
  # check illegal input
150
129
  raise(ActiveRdfError, "adding non-resource #{s}") unless s.respond_to?(:uri)
151
130
  raise(ActiveRdfError, "adding non-resource #{p}") unless p.respond_to?(:uri)
152
131
 
153
132
  # get internal representation (array)
154
- triple = internal_triple_representation(s,p,o)
133
+ quad = [s,p,o,c].collect {|r| internalise(r) }
155
134
 
156
135
  # add triple to database
157
- add_internal(*triple)
136
+ add_internal(@db,*quad)
158
137
  end
159
138
 
160
139
  # flushes openstanding changes to underlying sqlite3
@@ -167,38 +146,70 @@ class RDFLite < ActiveRdfAdapter
167
146
  # loads triples from file in ntriples format
168
147
  def load(file)
169
148
  ntriples = File.readlines(file)
149
+ $activerdflog.debug "read #{ntriples.size} triples from file #{file}"
170
150
 
171
- @db.transaction do
151
+ context = "<file:#{file}>"
152
+ add_ntriples(ntriples, context)
153
+ end
154
+
155
+ # adds string of ntriples from given context to database
156
+ def add_ntriples(ntriples, context=nil)
157
+ # convert context to internal format if RDFS::Resource
158
+ context = internalise(context)
159
+
160
+ # need unique identifier for this batch of triples (to detect occurence of
161
+ # same bnodes _:#1
162
+ uuid = `uuidgen`
163
+
164
+ # add each triple to db
165
+ @db.transaction do |tr|
172
166
  ntriples.each do |triple|
173
167
  nodes = triple.scan(Node)
174
- add_internal(nodes[0], nodes[1], nodes[2])
168
+
169
+ # handle bnodes if necessary (bnodes need to have uri generated)
170
+ subject = case nodes[0]
171
+ when BNode
172
+ "<http://www.activerdf.org/bnode/#$1/#{uuid}>"
173
+ else
174
+ nodes[0]
175
+ end
176
+
177
+ predicate = nodes[1]
178
+
179
+ # handle bnodes and literals if necessary (literals need unicode fixing)
180
+ object = case nodes[2]
181
+ when BNode
182
+ "<http://www.activerdf.org/bnode/#$1/#{uuid}>"
183
+ when Literal
184
+ fix_unicode(nodes[2])
185
+ else
186
+ nodes[2]
187
+ end
188
+
189
+ add_internal(tr, subject, predicate, object, context)
175
190
  end
176
191
  end
177
192
 
178
- $log.debug "read #{ntriples.size} triples from file #{file}"
179
193
  @db
180
194
  end
181
195
 
182
196
  # executes ActiveRDF query on datastore
183
197
  def query(query)
184
- # log received query
185
- $log.debug "received query: #{query.to_sp}"
186
-
187
198
  # construct query clauses
188
- sql = translate(query)
199
+ sql, conditions = translate(query)
189
200
 
190
201
  # executing query, passing all where-clause values as parameters (so that
191
202
  # sqlite will encode quotes correctly)
192
- constraints = @right_hand_sides.collect { |value| value.to_s }
193
-
194
- $log.debug format("executing: #{sql.gsub('?','"%s"')}", *constraints)
203
+ #constraints = right_hand_sides.collect { |value| value.to_s }
195
204
 
196
205
  # executing query
197
- results = @db.execute(sql, *constraints)
206
+ results = @db.execute(sql, *conditions)
198
207
 
199
208
  # if ASK query, we check whether we received a positive result count
200
209
  if query.ask?
201
- return [results[0][0].to_i > 0]
210
+ return [[results[0][0].to_i > 0]]
211
+ elsif query.count?
212
+ return [[results[0][0].to_i]]
202
213
  else
203
214
  # otherwise we convert results to ActiveRDF nodes and return them
204
215
  return wrap(query, results)
@@ -207,22 +218,23 @@ class RDFLite < ActiveRdfAdapter
207
218
 
208
219
  # translates ActiveRDF query into internal sqlite query string
209
220
  def translate(query)
210
- construct_select(query) + construct_join(query) + construct_where(query) +
211
- construct_limit(query)
221
+ where, conditions = construct_where(query)
222
+ [construct_select(query) + construct_join(query) + where + construct_sort(query) + construct_limit(query), conditions]
212
223
  end
213
224
 
214
225
  private
215
226
  # constants for extracting resources/literals from sql results
227
+ BNode = /_:(\S*)/
216
228
  Resource = /<([^>]*)>/
217
229
  Literal = /"([^"]*)"/
218
- Node = Regexp.union(/<[^>]*>/,/"[^"]*"/)
219
- SPO = ['s','p','o']
230
+ Node = Regexp.union(/_:\S*/,/<[^>]*>/,/"[^"]*"/)
231
+ SPOC = ['s','p','o','c']
220
232
 
221
233
  # adds s,p,o into sqlite and ferret
222
234
  # s,p,o should be in internal format: <uri> and "literal"
223
- def add_internal(s,p,o)
235
+ def add_internal(db, s, p, o, c)
224
236
  # insert the triple into the datastore
225
- @db.execute('insert into triple values (?,?,?)', s,p,o)
237
+ db.execute('insert into triple values (?,?,?,?)', s,p,o,c)
226
238
 
227
239
  # if keyword-search available, insert the object into keyword search
228
240
  @ferret << {:subject => s, :object => o} if keyword_search?
@@ -262,6 +274,16 @@ class RDFLite < ActiveRdfAdapter
262
274
  clause
263
275
  end
264
276
 
277
+ # sort query results on variable clause (optionally)
278
+ def construct_sort(query)
279
+ return "" if query.sort_clauses.empty?
280
+
281
+ sort = query.sort_clauses.collect do |term|
282
+ variable_name(query, term)
283
+ end
284
+ " order by (#{sort.join(',')})"
285
+ end
286
+
265
287
  # construct join clause
266
288
  # TODO: joins don't work this way, they have to be linear (in one direction
267
289
  # only, and we should only alias tables we didnt alias yet)
@@ -298,8 +320,8 @@ class RDFLite < ActiveRdfAdapter
298
320
 
299
321
  # construct t0,t1,... as aliases for term
300
322
  # and construct join condition, e.g. t0.s
301
- termalias = "t#{index / 3}"
302
- termjoin = "#{termalias}.#{SPO[index % 3]}"
323
+ termalias = "t#{index / 4}"
324
+ termjoin = "#{termalias}.#{SPOC[index % 4]}"
303
325
 
304
326
  join = if join_stmt.include?(termalias)
305
327
  ""
@@ -313,8 +335,8 @@ class RDFLite < ActiveRdfAdapter
313
335
 
314
336
  # construct t0,t1, etc. as aliases for buddy,
315
337
  # and construct join condition, e.g. t0.s = t1.p
316
- buddyalias = "t#{i/3}"
317
- buddyjoin = "#{buddyalias}.#{SPO[i%3]}"
338
+ buddyalias = "t#{i/4}"
339
+ buddyjoin = "#{buddyalias}.#{SPOC[i%4]}"
318
340
 
319
341
  # TODO: fix reuse of same table names as aliases, e.g.
320
342
  # "from triple as t1 join triple as t2 on ... join t1 on ..."
@@ -345,20 +367,22 @@ class RDFLite < ActiveRdfAdapter
345
367
  # collecting all the right-hand sides of where clauses (e.g. where name =
346
368
  # 'abc'), to add to query string later using ?-notation, because then
347
369
  # sqlite will automatically encode quoted literals correctly
348
- @right_hand_sides = []
370
+ right_hand_sides = []
349
371
 
350
372
  # convert each where clause to SQL:
351
373
  # add where clause for each subclause, except if it's a variable
352
374
  query.where_clauses.each_with_index do |clause,level|
375
+ raise ActiveRdfError, "where clause #{clause} is not a triple" unless clause.is_a?(Array)
353
376
  clause.each_with_index do |subclause, i|
354
377
  # dont add where clause for variables
355
378
  unless subclause.is_a?(Symbol)
356
- where << "t#{level}.#{SPO[i]} = ?"
357
- @right_hand_sides << case subclause
358
- when RDFS::Resource
359
- "<#{subclause.uri}>"
379
+ conditions = compute_where_condition(i, subclause, query.reasoning? && reasoning?)
380
+ if conditions.size == 1
381
+ where << "t#{level}.#{SPOC[i]} = ?"
382
+ right_hand_sides << conditions.first
360
383
  else
361
- subclause.to_s
384
+ conditions = conditions.collect {|c| "'#{c}'"}
385
+ where << "t#{level}.#{SPOC[i]} in (#{conditions.join(',')})"
362
386
  end
363
387
  end
364
388
  end
@@ -371,18 +395,53 @@ class RDFLite < ActiveRdfAdapter
371
395
  raise ActiveRdfError, "cannot do keyword search over multiple subjects" if select_subject.size > 1
372
396
 
373
397
  keywords = query.keywords.collect {|subj,key| key}
374
- @ferret.search_each("object:\"#{keywords}\"") do |idx,score|
398
+ @ferret.search_each("object:#{keywords}") do |idx,score|
375
399
  subjects << @ferret[idx][:subject]
376
400
  end
377
401
  subjects.uniq! if query.distinct?
378
402
  where << "#{variable_name(query,select_subject.first)} in (#{subjects.collect {'?'}.join(',')})"
379
- @right_hand_sides += subjects
403
+ right_hand_sides += subjects
380
404
  end
381
405
 
382
406
  if where.empty?
383
- ''
407
+ ['',[]]
408
+ else
409
+ ["where " + where.join(' and '), right_hand_sides]
410
+ end
411
+ end
412
+
413
+ def compute_where_condition(index, subclause, reasoning)
414
+ conditions = [subclause]
415
+
416
+ # expand conditions with rdfs rules if reasoning enabled
417
+ if reasoning
418
+ case index
419
+ when 0: ;
420
+ # no rule for subjects
421
+ when 1:
422
+ # expand properties to include all subproperties
423
+ conditions = subproperties(subclause) if subclause.respond_to?(:uri)
424
+ when 2:
425
+ # no rule for objects
426
+ when 3:
427
+ # no rule for contexts
428
+ end
429
+ end
430
+
431
+ # convert conditions into internal format
432
+ conditions.collect { |c| c.respond_to?(:uri) ? "<#{c.uri}>" : c.to_s }
433
+ end
434
+
435
+ def subproperties(resource)
436
+ subproperty = Namespace.lookup(:rdfs,:subPropertyOf)
437
+ children_query = Query.new.distinct(:sub).where(:sub, subproperty, resource)
438
+ children_query.reasoning = false
439
+ children = children_query.execute
440
+
441
+ if children.empty?
442
+ [resource]
384
443
  else
385
- "where " + where.join(' and ')
444
+ [resource] + children.collect{|c| subproperties(c)}.flatten.compact
386
445
  end
387
446
  end
388
447
 
@@ -412,28 +471,31 @@ class RDFLite < ActiveRdfAdapter
412
471
  end
413
472
  end
414
473
 
415
- termtable = "t#{index / 3}"
416
- termspo = SPO[index % 3]
474
+ termtable = "t#{index / 4}"
475
+ termspo = SPOC[index % 4]
417
476
  return "#{termtable}.#{termspo}"
418
477
  end
419
478
 
420
479
  # wrap resources into ActiveRDF resources, literals into Strings
421
480
  def wrap(query, results)
422
481
  results.collect do |row|
423
- row.collect do |result|
424
- case result
425
- when Resource
426
- RDFS::Resource.new($1)
427
- when Literal
428
- String.new($1)
429
- else
430
- # when we do a count(*) query we get a number, not a resource/literal
431
- results
432
- end
433
- end
482
+ row.collect { |result| parse(result) }
483
+ end
484
+ end
485
+
486
+ def parse(result)
487
+ case result
488
+ when Resource
489
+ RDFS::Resource.new($1)
490
+ when Literal
491
+ String.new($1)
492
+ else
493
+ # when we do a count(*) query we get a number, not a resource/literal
494
+ result
434
495
  end
435
496
  end
436
497
 
498
+
437
499
  def create_indices(params)
438
500
  sidx = params[:sidx] || false
439
501
  pidx = params[:pidx] || false
@@ -457,15 +519,21 @@ class RDFLite < ActiveRdfAdapter
457
519
 
458
520
  # transform triple into internal format <uri> and "literal"
459
521
  # returns array [s,p,o]
460
- def internal_triple_representation(s,p,o)
461
- s = "<#{s.uri}>"
462
- p = "<#{p.uri}>"
463
- o = case o
464
- when RDFS::Resource
465
- "<#{o.uri}>"
466
- else
467
- "\"#{o.to_s}\""
468
- end
469
- [s,p,o]
522
+ def internalise(r)
523
+ if r.respond_to?(:uri)
524
+ "<#{r.uri}>"
525
+ elsif r.is_a?(Symbol)
526
+ nil
527
+ else
528
+ "\"#{r.to_s}\""
529
+ end
470
530
  end
531
+
532
+ # fixes unicode characters in literals (because we parse them wrongly somehow)
533
+ def fix_unicode(str)
534
+ tmp = str.gsub(/\\\u([0-9a-fA-F]{4,4})/u){ "U+#$1" }
535
+ tmp.gsub(/U\+([0-9a-fA-F]{4,4})/u){["#$1".hex ].pack('U*')}
536
+ end
537
+
538
+ public :subproperties
471
539
  end
@@ -0,0 +1,73 @@
1
+ require 'pp'
2
+ class SuggestingAdapter < FetchingAdapter
3
+ ConnectionPool.register_adapter(:suggesting,self)
4
+
5
+ alias _old_initialize initialize
6
+
7
+ def initialize params
8
+ _old_initialize(params)
9
+ @db.execute('drop view if exists occurrence')
10
+ @db.execute('create view occurrence as select p, count(distinct s) as count from triple group by p')
11
+
12
+ @db.execute('drop view if exists cooccurrence')
13
+ @db.execute('create view cooccurrence as select t0.p as p1,t1.p as p2, count(distinct t0.s) as count from triple as t0 join triple as t1 on t0.s=t1.s and t0.p!=t1.p group by t0.p, t1.p')
14
+ end
15
+
16
+ def suggest(resource)
17
+ $activerdflog.debug "starting suggestions for #{size} triples"
18
+ time = Time.now
19
+
20
+ predicates = []
21
+ own_predicates = resource.direct_predicates
22
+
23
+ construct_occurrence_matrix
24
+ construct_cooccurrence_matrix
25
+
26
+ own_predicates.each do |p|
27
+ predicates << p if occurrence(p) > 1
28
+ end
29
+
30
+ # fetch all predicates co-occurring with our predicates
31
+ candidates = predicates.collect {|p| cooccurring(p) }
32
+
33
+ # perform set intersection
34
+ candidates = candidates.inject {|intersect, n| intersect & n }.flatten
35
+ candidates = candidates - own_predicates
36
+
37
+ suggestions = candidates.collect do |candidate|
38
+ score = predicates.inject(1.0) do |score, p|
39
+ score * cooccurrence(candidate, p) / occurrence(p)
40
+ end
41
+ [candidate, score]
42
+ end
43
+ $activerdflog.debug "suggestions for #{resource} took #{Time.now-time}s"
44
+ suggestions
45
+ end
46
+
47
+ def construct_occurrence_matrix
48
+ @occurrence = {}
49
+ @db.execute('select * from occurrence where count > 1') do |p,count|
50
+ @occurrence[parse(p)] = count.to_i
51
+ end
52
+ end
53
+
54
+ def construct_cooccurrence_matrix
55
+ @cooccurrence = {}
56
+ @db.execute('select * from cooccurrence') do |p1, p2, count|
57
+ @cooccurrence[parse(p1)] ||= {}
58
+ @cooccurrence[parse(p1)][parse(p2)] = count.to_i
59
+ end
60
+ end
61
+
62
+ def occurrence(predicate)
63
+ @occurrence[predicate] || 0
64
+ end
65
+
66
+ def cooccurrence(p1, p2)
67
+ @cooccurrence[p1][p2] || 0
68
+ end
69
+
70
+ def cooccurring(predicate)
71
+ @cooccurrence[predicate].keys
72
+ end
73
+ end
data/test/test_data.nt CHANGED
@@ -27,3 +27,6 @@
27
27
  <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Resource> .
28
28
  <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#domain> <http://www.w3.org/2000/01/rdf-schema#Resource> .
29
29
  <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2000/01/rdf-schema#Class> .
30
+ _:#1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://activerdf.org/test/Person> .
31
+ _:#1 <http://activerdf.org/test/age> "29" .
32
+ _:#1 <http://activerdf.org/test/name> "Another Person" .
data/test/test_rdflite.rb CHANGED
@@ -92,8 +92,45 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
92
92
  def test_loading_data
93
93
  adapter = ConnectionPool.add_data_source :type => :rdflite
94
94
  adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
95
- assert_equal 29, adapter.size
96
- assert_equal 29, Query.new.count(:s).where(:s,:p,:o).execute.to_i
95
+ assert_equal 32, adapter.size
96
+ end
97
+
98
+ def test_count_query
99
+ adapter = ConnectionPool.add_data_source :type => :rdflite
100
+ adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
101
+ assert_kind_of Fixnum, Query.new.count(:s).where(:s,:p,:o).execute
102
+ assert_equal 32, Query.new.count(:s).where(:s,:p,:o).execute
103
+ end
104
+
105
+ def test_single_context
106
+ adapter = ConnectionPool.add_data_source :type => :rdflite
107
+ file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
108
+ adapter.load(file)
109
+
110
+ context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
111
+ assert_instance_of RDFS::Resource, context
112
+ assert_equal RDFS::Resource.new("file:#{file}"), context
113
+ end
114
+
115
+ def test_multiple_context
116
+ adapter = ConnectionPool.add_data_source :type => :rdflite
117
+ file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
118
+ adapter.load(file)
119
+ file_context = RDFS::Resource.new("file:#{file}")
120
+
121
+ eyal = RDFS::Resource.new 'eyaloren.org'
122
+ age = RDFS::Resource.new 'foaf:age'
123
+ test = RDFS::Resource.new 'test'
124
+ adapter.add(eyal, age, test)
125
+
126
+ context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
127
+ assert_equal file_context, context[0]
128
+ assert_equal '', context[1]
129
+
130
+ n1 = Query.new.distinct(:s).where(:s,:p,:o,'').execute(:flatten => false)
131
+ n2 = Query.new.distinct(:s).where(:s,:p,:o,file_context).execute(:flatten => false)
132
+ assert_equal 1, n1.size
133
+ assert_equal 9, n2.size
97
134
  end
98
135
 
99
136
  def test_person_data
@@ -119,11 +156,11 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
119
156
  def test_delete_data
120
157
  adapter = ConnectionPool.add_data_source :type => :rdflite
121
158
  adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
122
- assert_equal 29, adapter.size
159
+ assert_equal 32, adapter.size
123
160
 
124
161
  eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
125
162
  adapter.delete(eyal, nil, nil)
126
- assert_equal 24, adapter.size
163
+ assert_equal 27, adapter.size
127
164
 
128
165
  adapter.delete(nil,nil,nil)
129
166
  assert_equal 0, adapter.size
@@ -136,7 +173,17 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
136
173
  eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
137
174
  assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"blue").execute
138
175
  assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"27").execute
139
-
140
176
  assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"eyal oren").execute
141
177
  end
178
+
179
+ def test_bnodes
180
+ adapter = ConnectionPool.add_data_source :type => :rdflite
181
+ adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
182
+
183
+ Namespace.register(:test, 'http://activerdf.org/test/')
184
+ ObjectManager.construct_classes
185
+ assert_equal 2, TEST::Person.find_all.size
186
+ assert_equal 29, TEST::Person.find_all[1].age.to_i
187
+ assert_equal "Another Person", TEST::Person.find_all[1].name
188
+ end
142
189
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: activerdf_rdflite
5
5
  version: !ruby/object:Gem::Version
6
- version: "1.0"
7
- date: 2006-11-02 00:00:00 +00:00
6
+ version: "1.1"
7
+ date: 2006-12-08 00:00:00 +00:00
8
8
  summary: an RDF database for usage in ActiveRDF (based on sqlite3)
9
9
  require_paths:
10
10
  - lib
@@ -37,6 +37,8 @@ files:
37
37
  - lib/activerdf_rdflite
38
38
  - lib/activerdf_rdflite/init.rb
39
39
  - lib/activerdf_rdflite/rdflite.rb
40
+ - lib/activerdf_rdflite/fetching.rb
41
+ - lib/activerdf_rdflite/suggesting.rb
40
42
  test_files: []
41
43
 
42
44
  rdoc_options: []