activerdf_rdflite 1.0 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ desc "test and package gem"
14
14
  task :default => [:test, :package]
15
15
 
16
16
  # get ActiveRdfVersion from commandline
17
- ActiveRdfVersion = ENV['REL'] || '0.9.7'
17
+ ActiveRdfVersion = ENV['REL'] || '1.0'
18
18
  NAME="activerdf_rdflite"
19
19
  GEMNAME="#{NAME}-#{ActiveRdfVersion}.gem"
20
20
 
@@ -0,0 +1,43 @@
1
+ #class String
2
+ # alias _match match
3
+ # def match(*args)
4
+ # m = _match(args.first)
5
+ # if m && m.length > 1
6
+ # args[1..-1].each_with_index do |name, index|
7
+ # m.instance_eval "def #{name}; self[#{index+1}] end"
8
+ # end
9
+ # end
10
+ # m
11
+ # end
12
+ #end
13
+
14
+ class FetchingAdapter < RDFLite
15
+ ConnectionPool.register_adapter(:fetching,self)
16
+
17
+ # fetches RDF/XML data from given url and adds it to the datastore, using the
18
+ # source url as context identifier.
19
+ def fetch url
20
+ return unless url.match(/http:\/\/(.*)/)
21
+
22
+ # remove fragment identifier from url
23
+ hashidx = url.rindex('#')
24
+ url = url[0..url.rindex('#')-1] unless hashidx.nil?
25
+
26
+ $activerdflog.debug "fetching from #{url}"
27
+
28
+ #model = Redland::Model.new
29
+ #parser = Redland::Parser.new('rdfxml')
30
+ #scan = Redland::Uri.new('http://feature.librdf.org/raptor-scanForRDF')
31
+ #enable = Redland::Literal.new('1')
32
+ #Redland::librdf_parser_set_feature(parser, scan.uri, enable.node)
33
+ #parser.parse_into_model(model, url)
34
+ #triples = Redland::Serializer.ntriples.model_to_string(nil, model)
35
+
36
+ triples = `rapper --scan "#{url}"`
37
+ lines = triples.split($/)
38
+ $activerdflog.debug "found #{lines.size} triples"
39
+
40
+ context = RDFS::Resource.new(url)
41
+ add_ntriples(triples, context)
42
+ end
43
+ end
@@ -9,3 +9,5 @@ end
9
9
  $: << File.dirname(File.expand_path(file))
10
10
 
11
11
  require 'rdflite'
12
+ require 'fetching'
13
+ require 'suggesting'
@@ -1,7 +1,3 @@
1
- # RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
2
- # in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
3
- # search if ferret is installed.
4
- #
5
1
  # Author:: Eyal Oren
6
2
  # Copyright:: (c) 2005-2006 Eyal Oren
7
3
  # License:: LGPL
@@ -10,20 +6,23 @@ require 'sqlite3'
10
6
  require 'active_rdf'
11
7
  require 'federation/connection_pool'
12
8
 
13
- $log.info "loading RDFLite adapter"
9
+ $activerdflog.info "loading RDFLite adapter"
14
10
 
15
11
  begin
16
12
  require 'ferret'
17
13
  @@have_ferret = true
18
14
  rescue LoadError
19
- $log.info "Keyword search is disabled since we could not load Ferret. To
15
+ $activerdflog.info "Keyword search is disabled since we could not load Ferret. To
20
16
  enable, please do \"gem install ferret\""
21
17
  @@have_ferret = false
22
18
  end
23
19
 
20
+ # RDFLite is a lightweight RDF database on top of sqlite3. It can act as adapter
21
+ # in ActiveRDF. It supports on-disk and in-memory usage, and allows keyword
22
+ # search if ferret is installed.
24
23
  class RDFLite < ActiveRdfAdapter
25
24
  ConnectionPool.register_adapter(:rdflite,self)
26
- bool_accessor :keyword_search
25
+ bool_accessor :keyword_search, :reasoning
27
26
 
28
27
  # instantiates RDFLite database
29
28
  # available parameters:
@@ -31,7 +30,7 @@ class RDFLite < ActiveRdfAdapter
31
30
  # * :keyword => true/false (defaults to true)
32
31
  # * :pidx, :oidx, etc. => true/false (enable/disable these indices)
33
32
  def initialize(params = {})
34
- $log.info "initialised rdflite with params #{params.to_s}"
33
+ $activerdflog.info "initialised rdflite with params #{params.to_s}"
35
34
 
36
35
  @reads = true
37
36
  @writes = true
@@ -40,22 +39,17 @@ class RDFLite < ActiveRdfAdapter
40
39
  file = params[:location] || ':memory:'
41
40
  @db = SQLite3::Database.new(file)
42
41
 
43
- # we enable keyword unless the user specifies otherwise
44
- @keyword_search = if params[:keyword].nil?
45
- true
46
- else
47
- params[:keyword]
48
- end
49
-
50
- # we can only do keyword search if ferret is found
42
+ # enable keyword search by default, but only if ferret is found
43
+ @keyword_search = params[:keyword].nil? ? true : params[:keyword]
51
44
  @keyword_search &= @@have_ferret
52
- $log.debug "we #{keyword_search? ? "do" : "don't"} have keyword search"
45
+
46
+ @reasoning = params[:reasoning] || false
53
47
 
54
48
  if keyword_search?
55
49
  # we initialise the ferret index, either as a file or in memory
50
+ infos = Ferret::Index::FieldInfos.new
56
51
 
57
52
  # we setup the fields not to store object's contents
58
- infos = Ferret::Index::FieldInfos.new
59
53
  infos.add_field(:subject, :store => :yes, :index => :no, :term_vector => :no)
60
54
  infos.add_field(:object, :store => :no) #, :index => :omit_norms)
61
55
 
@@ -69,13 +63,11 @@ class RDFLite < ActiveRdfAdapter
69
63
  # turn off filesystem synchronisation for speed
70
64
  @db.synchronous = 'off'
71
65
 
72
- # create triples table. since triples are unique, inserted duplicates are
73
- @db.execute('create table if not exists triple(s,p,o, unique(s,p,o) on conflict ignore)')
66
+ # create triples table. ignores duplicated triples
67
+ @db.execute('create table if not exists triple(s,p,o,c, unique(s,p,o,c) on conflict ignore)')
74
68
 
75
69
  create_indices(params)
76
-
77
- $log.debug("opened connection to #{file}")
78
- $log.debug("database contains #{size} triples")
70
+ @db
79
71
  end
80
72
 
81
73
  # returns the number of triples in the datastore (incl. possible duplicates)
@@ -85,8 +77,8 @@ class RDFLite < ActiveRdfAdapter
85
77
 
86
78
  # returns all triples in the datastore
87
79
  def dump
88
- @db.execute('select s,p,o from triple') do |s,p,o|
89
- [s,p,o].join(' ')
80
+ @db.execute('select s,p,o,c from triple').collect do |s,p,o,c|
81
+ [s,p,o,c].join(' ')
90
82
  end
91
83
  end
92
84
 
@@ -95,37 +87,23 @@ class RDFLite < ActiveRdfAdapter
95
87
  @db.execute('delete from triple')
96
88
  end
97
89
 
98
- # deletes triple(s,p,o) from datastore
99
- # nil parameters match anything: delete(nil,nil,nil) will delete all triples
100
- def delete(s,p,o)
90
+ # deletes triple(s,p,o,c) from datastore
91
+ # symbol parameters match anything: delete(:s,:p,:o) will delete all triples
92
+ # you can specify a context to limit deletion to that context:
93
+ # delete(:s,:p,:o, 'http://context') will delete all triples with that context
94
+ def delete(s,p,o,c=nil)
101
95
  # convert input to internal format
102
- # leave nil input alone (we'll deal with it later)
103
- s = "<#{s.uri}>" unless s.nil?
104
- p = "<#{p.uri}>" unless p.nil?
105
- o = case o
106
- when RDFS::Resource
107
- "<#{o.uri}>"
108
- else
109
- "\"#{o.to_s}\""
110
- end unless o.nil?
111
-
112
- # construct where clause for deletion (for all non-nil input)
113
- where_clauses = []
114
- conditions = []
115
- unless s.nil?
116
- conditions << s
117
- where_clauses << 's = ?'
118
- end
96
+ quad = [s,p,o,c].collect {|r| internalise(r) }
119
97
 
120
- unless p.nil?
121
- conditions << p
122
- where_clauses << 'p = ?'
123
- end
124
-
125
- unless o.nil?
126
- conditions << o
127
- where_clauses << 'o = ?'
128
- end
98
+ # construct where clause for deletion (for all non-nil input)
99
+ where_clauses = []
100
+ conditions = []
101
+ quad.each_with_index do |r,i|
102
+ unless r.nil?
103
+ conditions << r
104
+ where_clauses << "#{SPOC[i]} = ?"
105
+ end
106
+ end
129
107
 
130
108
  # construct delete string
131
109
  ds = 'delete from triple'
@@ -133,28 +111,29 @@ class RDFLite < ActiveRdfAdapter
133
111
 
134
112
  # execute delete string with possible deletion conditions (for each
135
113
  # non-empty where clause)
114
+ $activerdflog.debug("deleting #{[s,p,o,c].join(' ')}")
136
115
  @db.execute(ds, *conditions)
137
- $log.debug(sprintf("sending delete query: #{ds}", *conditions))
138
116
 
139
117
  # delete literal from ferret index
140
118
  @ferret.search_each("subject:\"#{s}\", object:\"#{o}\"") do |idx, score|
141
- $log.debug "deleting #{o} => #{s} from ferret index"
142
119
  @ferret.delete(idx)
143
120
  end if keyword_search?
121
+
122
+ @db
144
123
  end
145
124
 
146
125
  # adds triple(s,p,o) to datastore
147
126
  # s,p must be resources, o can be primitive data or resource
148
- def add(s,p,o)
127
+ def add(s,p,o,c=nil)
149
128
  # check illegal input
150
129
  raise(ActiveRdfError, "adding non-resource #{s}") unless s.respond_to?(:uri)
151
130
  raise(ActiveRdfError, "adding non-resource #{p}") unless p.respond_to?(:uri)
152
131
 
153
132
  # get internal representation (array)
154
- triple = internal_triple_representation(s,p,o)
133
+ quad = [s,p,o,c].collect {|r| internalise(r) }
155
134
 
156
135
  # add triple to database
157
- add_internal(*triple)
136
+ add_internal(@db,*quad)
158
137
  end
159
138
 
160
139
  # flushes openstanding changes to underlying sqlite3
@@ -167,38 +146,70 @@ class RDFLite < ActiveRdfAdapter
167
146
  # loads triples from file in ntriples format
168
147
  def load(file)
169
148
  ntriples = File.readlines(file)
149
+ $activerdflog.debug "read #{ntriples.size} triples from file #{file}"
170
150
 
171
- @db.transaction do
151
+ context = "<file:#{file}>"
152
+ add_ntriples(ntriples, context)
153
+ end
154
+
155
+ # adds string of ntriples from given context to database
156
+ def add_ntriples(ntriples, context=nil)
157
+ # convert context to internal format if RDFS::Resource
158
+ context = internalise(context)
159
+
160
+ # need unique identifier for this batch of triples (to detect occurence of
161
+ # same bnodes _:#1
162
+ uuid = `uuidgen`
163
+
164
+ # add each triple to db
165
+ @db.transaction do |tr|
172
166
  ntriples.each do |triple|
173
167
  nodes = triple.scan(Node)
174
- add_internal(nodes[0], nodes[1], nodes[2])
168
+
169
+ # handle bnodes if necessary (bnodes need to have uri generated)
170
+ subject = case nodes[0]
171
+ when BNode
172
+ "<http://www.activerdf.org/bnode/#$1/#{uuid}>"
173
+ else
174
+ nodes[0]
175
+ end
176
+
177
+ predicate = nodes[1]
178
+
179
+ # handle bnodes and literals if necessary (literals need unicode fixing)
180
+ object = case nodes[2]
181
+ when BNode
182
+ "<http://www.activerdf.org/bnode/#$1/#{uuid}>"
183
+ when Literal
184
+ fix_unicode(nodes[2])
185
+ else
186
+ nodes[2]
187
+ end
188
+
189
+ add_internal(tr, subject, predicate, object, context)
175
190
  end
176
191
  end
177
192
 
178
- $log.debug "read #{ntriples.size} triples from file #{file}"
179
193
  @db
180
194
  end
181
195
 
182
196
  # executes ActiveRDF query on datastore
183
197
  def query(query)
184
- # log received query
185
- $log.debug "received query: #{query.to_sp}"
186
-
187
198
  # construct query clauses
188
- sql = translate(query)
199
+ sql, conditions = translate(query)
189
200
 
190
201
  # executing query, passing all where-clause values as parameters (so that
191
202
  # sqlite will encode quotes correctly)
192
- constraints = @right_hand_sides.collect { |value| value.to_s }
193
-
194
- $log.debug format("executing: #{sql.gsub('?','"%s"')}", *constraints)
203
+ #constraints = right_hand_sides.collect { |value| value.to_s }
195
204
 
196
205
  # executing query
197
- results = @db.execute(sql, *constraints)
206
+ results = @db.execute(sql, *conditions)
198
207
 
199
208
  # if ASK query, we check whether we received a positive result count
200
209
  if query.ask?
201
- return [results[0][0].to_i > 0]
210
+ return [[results[0][0].to_i > 0]]
211
+ elsif query.count?
212
+ return [[results[0][0].to_i]]
202
213
  else
203
214
  # otherwise we convert results to ActiveRDF nodes and return them
204
215
  return wrap(query, results)
@@ -207,22 +218,23 @@ class RDFLite < ActiveRdfAdapter
207
218
 
208
219
  # translates ActiveRDF query into internal sqlite query string
209
220
  def translate(query)
210
- construct_select(query) + construct_join(query) + construct_where(query) +
211
- construct_limit(query)
221
+ where, conditions = construct_where(query)
222
+ [construct_select(query) + construct_join(query) + where + construct_sort(query) + construct_limit(query), conditions]
212
223
  end
213
224
 
214
225
  private
215
226
  # constants for extracting resources/literals from sql results
227
+ BNode = /_:(\S*)/
216
228
  Resource = /<([^>]*)>/
217
229
  Literal = /"([^"]*)"/
218
- Node = Regexp.union(/<[^>]*>/,/"[^"]*"/)
219
- SPO = ['s','p','o']
230
+ Node = Regexp.union(/_:\S*/,/<[^>]*>/,/"[^"]*"/)
231
+ SPOC = ['s','p','o','c']
220
232
 
221
233
  # adds s,p,o into sqlite and ferret
222
234
  # s,p,o should be in internal format: <uri> and "literal"
223
- def add_internal(s,p,o)
235
+ def add_internal(db, s, p, o, c)
224
236
  # insert the triple into the datastore
225
- @db.execute('insert into triple values (?,?,?)', s,p,o)
237
+ db.execute('insert into triple values (?,?,?,?)', s,p,o,c)
226
238
 
227
239
  # if keyword-search available, insert the object into keyword search
228
240
  @ferret << {:subject => s, :object => o} if keyword_search?
@@ -262,6 +274,16 @@ class RDFLite < ActiveRdfAdapter
262
274
  clause
263
275
  end
264
276
 
277
+ # sort query results on variable clause (optionally)
278
+ def construct_sort(query)
279
+ return "" if query.sort_clauses.empty?
280
+
281
+ sort = query.sort_clauses.collect do |term|
282
+ variable_name(query, term)
283
+ end
284
+ " order by (#{sort.join(',')})"
285
+ end
286
+
265
287
  # construct join clause
266
288
  # TODO: joins don't work this way, they have to be linear (in one direction
267
289
  # only, and we should only alias tables we didnt alias yet)
@@ -298,8 +320,8 @@ class RDFLite < ActiveRdfAdapter
298
320
 
299
321
  # construct t0,t1,... as aliases for term
300
322
  # and construct join condition, e.g. t0.s
301
- termalias = "t#{index / 3}"
302
- termjoin = "#{termalias}.#{SPO[index % 3]}"
323
+ termalias = "t#{index / 4}"
324
+ termjoin = "#{termalias}.#{SPOC[index % 4]}"
303
325
 
304
326
  join = if join_stmt.include?(termalias)
305
327
  ""
@@ -313,8 +335,8 @@ class RDFLite < ActiveRdfAdapter
313
335
 
314
336
  # construct t0,t1, etc. as aliases for buddy,
315
337
  # and construct join condition, e.g. t0.s = t1.p
316
- buddyalias = "t#{i/3}"
317
- buddyjoin = "#{buddyalias}.#{SPO[i%3]}"
338
+ buddyalias = "t#{i/4}"
339
+ buddyjoin = "#{buddyalias}.#{SPOC[i%4]}"
318
340
 
319
341
  # TODO: fix reuse of same table names as aliases, e.g.
320
342
  # "from triple as t1 join triple as t2 on ... join t1 on ..."
@@ -345,20 +367,22 @@ class RDFLite < ActiveRdfAdapter
345
367
  # collecting all the right-hand sides of where clauses (e.g. where name =
346
368
  # 'abc'), to add to query string later using ?-notation, because then
347
369
  # sqlite will automatically encode quoted literals correctly
348
- @right_hand_sides = []
370
+ right_hand_sides = []
349
371
 
350
372
  # convert each where clause to SQL:
351
373
  # add where clause for each subclause, except if it's a variable
352
374
  query.where_clauses.each_with_index do |clause,level|
375
+ raise ActiveRdfError, "where clause #{clause} is not a triple" unless clause.is_a?(Array)
353
376
  clause.each_with_index do |subclause, i|
354
377
  # dont add where clause for variables
355
378
  unless subclause.is_a?(Symbol)
356
- where << "t#{level}.#{SPO[i]} = ?"
357
- @right_hand_sides << case subclause
358
- when RDFS::Resource
359
- "<#{subclause.uri}>"
379
+ conditions = compute_where_condition(i, subclause, query.reasoning? && reasoning?)
380
+ if conditions.size == 1
381
+ where << "t#{level}.#{SPOC[i]} = ?"
382
+ right_hand_sides << conditions.first
360
383
  else
361
- subclause.to_s
384
+ conditions = conditions.collect {|c| "'#{c}'"}
385
+ where << "t#{level}.#{SPOC[i]} in (#{conditions.join(',')})"
362
386
  end
363
387
  end
364
388
  end
@@ -371,18 +395,53 @@ class RDFLite < ActiveRdfAdapter
371
395
  raise ActiveRdfError, "cannot do keyword search over multiple subjects" if select_subject.size > 1
372
396
 
373
397
  keywords = query.keywords.collect {|subj,key| key}
374
- @ferret.search_each("object:\"#{keywords}\"") do |idx,score|
398
+ @ferret.search_each("object:#{keywords}") do |idx,score|
375
399
  subjects << @ferret[idx][:subject]
376
400
  end
377
401
  subjects.uniq! if query.distinct?
378
402
  where << "#{variable_name(query,select_subject.first)} in (#{subjects.collect {'?'}.join(',')})"
379
- @right_hand_sides += subjects
403
+ right_hand_sides += subjects
380
404
  end
381
405
 
382
406
  if where.empty?
383
- ''
407
+ ['',[]]
408
+ else
409
+ ["where " + where.join(' and '), right_hand_sides]
410
+ end
411
+ end
412
+
413
+ def compute_where_condition(index, subclause, reasoning)
414
+ conditions = [subclause]
415
+
416
+ # expand conditions with rdfs rules if reasoning enabled
417
+ if reasoning
418
+ case index
419
+ when 0: ;
420
+ # no rule for subjects
421
+ when 1:
422
+ # expand properties to include all subproperties
423
+ conditions = subproperties(subclause) if subclause.respond_to?(:uri)
424
+ when 2:
425
+ # no rule for objects
426
+ when 3:
427
+ # no rule for contexts
428
+ end
429
+ end
430
+
431
+ # convert conditions into internal format
432
+ conditions.collect { |c| c.respond_to?(:uri) ? "<#{c.uri}>" : c.to_s }
433
+ end
434
+
435
+ def subproperties(resource)
436
+ subproperty = Namespace.lookup(:rdfs,:subPropertyOf)
437
+ children_query = Query.new.distinct(:sub).where(:sub, subproperty, resource)
438
+ children_query.reasoning = false
439
+ children = children_query.execute
440
+
441
+ if children.empty?
442
+ [resource]
384
443
  else
385
- "where " + where.join(' and ')
444
+ [resource] + children.collect{|c| subproperties(c)}.flatten.compact
386
445
  end
387
446
  end
388
447
 
@@ -412,28 +471,31 @@ class RDFLite < ActiveRdfAdapter
412
471
  end
413
472
  end
414
473
 
415
- termtable = "t#{index / 3}"
416
- termspo = SPO[index % 3]
474
+ termtable = "t#{index / 4}"
475
+ termspo = SPOC[index % 4]
417
476
  return "#{termtable}.#{termspo}"
418
477
  end
419
478
 
420
479
  # wrap resources into ActiveRDF resources, literals into Strings
421
480
  def wrap(query, results)
422
481
  results.collect do |row|
423
- row.collect do |result|
424
- case result
425
- when Resource
426
- RDFS::Resource.new($1)
427
- when Literal
428
- String.new($1)
429
- else
430
- # when we do a count(*) query we get a number, not a resource/literal
431
- results
432
- end
433
- end
482
+ row.collect { |result| parse(result) }
483
+ end
484
+ end
485
+
486
+ def parse(result)
487
+ case result
488
+ when Resource
489
+ RDFS::Resource.new($1)
490
+ when Literal
491
+ String.new($1)
492
+ else
493
+ # when we do a count(*) query we get a number, not a resource/literal
494
+ result
434
495
  end
435
496
  end
436
497
 
498
+
437
499
  def create_indices(params)
438
500
  sidx = params[:sidx] || false
439
501
  pidx = params[:pidx] || false
@@ -457,15 +519,21 @@ class RDFLite < ActiveRdfAdapter
457
519
 
458
520
  # transform triple into internal format <uri> and "literal"
459
521
  # returns array [s,p,o]
460
- def internal_triple_representation(s,p,o)
461
- s = "<#{s.uri}>"
462
- p = "<#{p.uri}>"
463
- o = case o
464
- when RDFS::Resource
465
- "<#{o.uri}>"
466
- else
467
- "\"#{o.to_s}\""
468
- end
469
- [s,p,o]
522
+ def internalise(r)
523
+ if r.respond_to?(:uri)
524
+ "<#{r.uri}>"
525
+ elsif r.is_a?(Symbol)
526
+ nil
527
+ else
528
+ "\"#{r.to_s}\""
529
+ end
470
530
  end
531
+
532
+ # fixes unicode characters in literals (because we parse them wrongly somehow)
533
+ def fix_unicode(str)
534
+ tmp = str.gsub(/\\\u([0-9a-fA-F]{4,4})/u){ "U+#$1" }
535
+ tmp.gsub(/U\+([0-9a-fA-F]{4,4})/u){["#$1".hex ].pack('U*')}
536
+ end
537
+
538
+ public :subproperties
471
539
  end
@@ -0,0 +1,73 @@
1
+ require 'pp'
2
+ class SuggestingAdapter < FetchingAdapter
3
+ ConnectionPool.register_adapter(:suggesting,self)
4
+
5
+ alias _old_initialize initialize
6
+
7
+ def initialize params
8
+ _old_initialize(params)
9
+ @db.execute('drop view if exists occurrence')
10
+ @db.execute('create view occurrence as select p, count(distinct s) as count from triple group by p')
11
+
12
+ @db.execute('drop view if exists cooccurrence')
13
+ @db.execute('create view cooccurrence as select t0.p as p1,t1.p as p2, count(distinct t0.s) as count from triple as t0 join triple as t1 on t0.s=t1.s and t0.p!=t1.p group by t0.p, t1.p')
14
+ end
15
+
16
+ def suggest(resource)
17
+ $activerdflog.debug "starting suggestions for #{size} triples"
18
+ time = Time.now
19
+
20
+ predicates = []
21
+ own_predicates = resource.direct_predicates
22
+
23
+ construct_occurrence_matrix
24
+ construct_cooccurrence_matrix
25
+
26
+ own_predicates.each do |p|
27
+ predicates << p if occurrence(p) > 1
28
+ end
29
+
30
+ # fetch all predicates co-occurring with our predicates
31
+ candidates = predicates.collect {|p| cooccurring(p) }
32
+
33
+ # perform set intersection
34
+ candidates = candidates.inject {|intersect, n| intersect & n }.flatten
35
+ candidates = candidates - own_predicates
36
+
37
+ suggestions = candidates.collect do |candidate|
38
+ score = predicates.inject(1.0) do |score, p|
39
+ score * cooccurrence(candidate, p) / occurrence(p)
40
+ end
41
+ [candidate, score]
42
+ end
43
+ $activerdflog.debug "suggestions for #{resource} took #{Time.now-time}s"
44
+ suggestions
45
+ end
46
+
47
+ def construct_occurrence_matrix
48
+ @occurrence = {}
49
+ @db.execute('select * from occurrence where count > 1') do |p,count|
50
+ @occurrence[parse(p)] = count.to_i
51
+ end
52
+ end
53
+
54
+ def construct_cooccurrence_matrix
55
+ @cooccurrence = {}
56
+ @db.execute('select * from cooccurrence') do |p1, p2, count|
57
+ @cooccurrence[parse(p1)] ||= {}
58
+ @cooccurrence[parse(p1)][parse(p2)] = count.to_i
59
+ end
60
+ end
61
+
62
+ def occurrence(predicate)
63
+ @occurrence[predicate] || 0
64
+ end
65
+
66
+ def cooccurrence(p1, p2)
67
+ @cooccurrence[p1][p2] || 0
68
+ end
69
+
70
+ def cooccurring(predicate)
71
+ @cooccurrence[predicate].keys
72
+ end
73
+ end
data/test/test_data.nt CHANGED
@@ -27,3 +27,6 @@
27
27
  <http://www.w3.org/1999/02/22-rdf-syntax-ns#Property> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Resource> .
28
28
  <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#domain> <http://www.w3.org/2000/01/rdf-schema#Resource> .
29
29
  <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2000/01/rdf-schema#Class> .
30
+ _:#1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://activerdf.org/test/Person> .
31
+ _:#1 <http://activerdf.org/test/age> "29" .
32
+ _:#1 <http://activerdf.org/test/name> "Another Person" .
data/test/test_rdflite.rb CHANGED
@@ -92,8 +92,45 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
92
92
  def test_loading_data
93
93
  adapter = ConnectionPool.add_data_source :type => :rdflite
94
94
  adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
95
- assert_equal 29, adapter.size
96
- assert_equal 29, Query.new.count(:s).where(:s,:p,:o).execute.to_i
95
+ assert_equal 32, adapter.size
96
+ end
97
+
98
+ def test_count_query
99
+ adapter = ConnectionPool.add_data_source :type => :rdflite
100
+ adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
101
+ assert_kind_of Fixnum, Query.new.count(:s).where(:s,:p,:o).execute
102
+ assert_equal 32, Query.new.count(:s).where(:s,:p,:o).execute
103
+ end
104
+
105
+ def test_single_context
106
+ adapter = ConnectionPool.add_data_source :type => :rdflite
107
+ file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
108
+ adapter.load(file)
109
+
110
+ context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
111
+ assert_instance_of RDFS::Resource, context
112
+ assert_equal RDFS::Resource.new("file:#{file}"), context
113
+ end
114
+
115
+ def test_multiple_context
116
+ adapter = ConnectionPool.add_data_source :type => :rdflite
117
+ file = File.dirname(File.expand_path(__FILE__)) + '/test_data.nt'
118
+ adapter.load(file)
119
+ file_context = RDFS::Resource.new("file:#{file}")
120
+
121
+ eyal = RDFS::Resource.new 'eyaloren.org'
122
+ age = RDFS::Resource.new 'foaf:age'
123
+ test = RDFS::Resource.new 'test'
124
+ adapter.add(eyal, age, test)
125
+
126
+ context = Query.new.distinct(:c).where(:s,:p,:o,:c).execute
127
+ assert_equal file_context, context[0]
128
+ assert_equal '', context[1]
129
+
130
+ n1 = Query.new.distinct(:s).where(:s,:p,:o,'').execute(:flatten => false)
131
+ n2 = Query.new.distinct(:s).where(:s,:p,:o,file_context).execute(:flatten => false)
132
+ assert_equal 1, n1.size
133
+ assert_equal 9, n2.size
97
134
  end
98
135
 
99
136
  def test_person_data
@@ -119,11 +156,11 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
119
156
  def test_delete_data
120
157
  adapter = ConnectionPool.add_data_source :type => :rdflite
121
158
  adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
122
- assert_equal 29, adapter.size
159
+ assert_equal 32, adapter.size
123
160
 
124
161
  eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
125
162
  adapter.delete(eyal, nil, nil)
126
- assert_equal 24, adapter.size
163
+ assert_equal 27, adapter.size
127
164
 
128
165
  adapter.delete(nil,nil,nil)
129
166
  assert_equal 0, adapter.size
@@ -136,7 +173,17 @@ class TestRdfLiteAdapter < Test::Unit::TestCase
136
173
  eyal = RDFS::Resource.new('http://activerdf.org/test/eyal')
137
174
  assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"blue").execute
138
175
  assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"27").execute
139
-
140
176
  assert_equal eyal, Query.new.distinct(:s).where(:s,:keyword,"eyal oren").execute
141
177
  end
178
+
179
+ def test_bnodes
180
+ adapter = ConnectionPool.add_data_source :type => :rdflite
181
+ adapter.load(File.dirname(File.expand_path(__FILE__)) + '/test_data.nt')
182
+
183
+ Namespace.register(:test, 'http://activerdf.org/test/')
184
+ ObjectManager.construct_classes
185
+ assert_equal 2, TEST::Person.find_all.size
186
+ assert_equal 29, TEST::Person.find_all[1].age.to_i
187
+ assert_equal "Another Person", TEST::Person.find_all[1].name
188
+ end
142
189
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: activerdf_rdflite
5
5
  version: !ruby/object:Gem::Version
6
- version: "1.0"
7
- date: 2006-11-02 00:00:00 +00:00
6
+ version: "1.1"
7
+ date: 2006-12-08 00:00:00 +00:00
8
8
  summary: an RDF database for usage in ActiveRDF (based on sqlite3)
9
9
  require_paths:
10
10
  - lib
@@ -37,6 +37,8 @@ files:
37
37
  - lib/activerdf_rdflite
38
38
  - lib/activerdf_rdflite/init.rb
39
39
  - lib/activerdf_rdflite/rdflite.rb
40
+ - lib/activerdf_rdflite/fetching.rb
41
+ - lib/activerdf_rdflite/suggesting.rb
40
42
  test_files: []
41
43
 
42
44
  rdoc_options: []