graffiti 2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,568 @@
1
+ # Graffiti RDF Store
2
+ # (originally written for Samizdat project)
3
+ #
4
+ # Copyright (c) 2002-2011 Dmitry Borodaenko <angdraug@debian.org>
5
+ #
6
+ # This program is free software.
7
+ # You can distribute/modify this program under the terms of
8
+ # the GNU General Public License version 3 or later.
9
+ #
10
+ # see doc/rdf-storage.txt for introduction and Graffiti Squish definition;
11
+ # see doc/storage-impl.txt for explanation of implemented algorithms
12
+ #
13
+ # vim: et sw=2 sts=2 ts=8 tw=0
14
+
15
+ require 'graffiti/exceptions'
16
+ require 'graffiti/sql_mapper'
17
+
18
+ module Graffiti
19
+
20
+ # parse Squish query and translate triples to relational conditions
21
+ #
22
+ # provides access to internal representation of the parsed query and utility
23
+ # functions to deal with Squish syntax
24
+ #
25
+ class SquishQuery
26
+ include Debug
27
+
28
+ # regexp for internal resource reference
29
+ INTERNAL = Regexp.new(/\A([[:digit:]]+)\z/).freeze
30
+
31
+ # regexp for blank node mark and name
32
+ BN = Regexp.new(/\A\?([[:alnum:]_]+)\z/).freeze
33
+
34
+ # regexp for scanning blank nodes inside a string
35
+ BN_SCAN = Regexp.new(/\?[[:alnum:]_]+?\b/).freeze
36
+
37
+ # regexp for parametrized value
38
+ PARAMETER = Regexp.new(/\A:([[:alnum:]_]+)\z/).freeze
39
+
40
+ # regexp for replaced string literal
41
+ LITERAL = Regexp.new(/\A'(\d+)'\z/).freeze
42
+
43
+ # regexp for scanning replaced string literals in a string
44
+ LITERAL_SCAN = Regexp.new(/'(\d+)'/).freeze
45
+
46
+ # regexp for scanning query parameters inside a string
47
+ PARAMETER_AND_LITERAL_SCAN = Regexp.new(/\B:([[:alnum:]_]+)|'(\d+)'/).freeze
48
+
49
+ # regexp for number
50
+ NUMBER = Regexp.new(/\A-?[[:digit:]]+(\.[[:digit:]]+)?\z/).freeze
51
+
52
+ # regexp for operator
53
+ OPERATOR = Regexp.new(/\A(\+|-|\*|\/|\|\||<|<=|>|>=|=|!=|@@|to_tsvector|to_tsquery|I?LIKE|NOT|AND|OR|IN|IS|NULL)\z/i).freeze
54
+
55
+ # regexp for aggregate function
56
+ AGGREGATE = Regexp.new(/\A(avg|count|max|min|sum)\z/i).freeze
57
+
58
+ QUERY = Regexp.new(/\A\s*(SELECT|INSERT|UPDATE)\b\s*(.*?)\s*
59
+ \bWHERE\b\s*(.*?)\s*
60
+ (?:\bEXCEPT\b\s*(.*?))?\s*
61
+ (?:\bOPTIONAL\b\s*(.*?))?\s*
62
+ (?:\bLITERAL\b\s*(.*?))?\s*
63
+ (?:\bGROUP\s+BY\b\s*(.*?))?\s*
64
+ (?:\bORDER\s+BY\b\s*(.*?)\s*(ASC|DESC)?)?\s*
65
+ (?:\bUSING\b\s*(.*?))?\s*\z/mix).freeze
66
+
67
+ # extract common Squish query sections, perform namespace substitution,
68
+ # generate query pattern graph, call transform_pattern,
69
+ # determine query type and parse nodes section accordingly
70
+ #
71
+ def initialize(config, query)
72
+ query.nil? and raise ProgrammingError, "SquishQuery: query can't be nil"
73
+ if query.kind_of? Hash # pre-parsed query (used by SquishAssert)
74
+ @nodes = query[:nodes]
75
+ @pattern = query[:pattern]
76
+ @negative = query[:negative]
77
+ @optional = query[:optional]
78
+ @strings = query[:strings]
79
+ @literal = @group = @order = ''
80
+ @sql_mapper = SqlMapper.new(config, @pattern)
81
+ return self
82
+ elsif not query.kind_of? String
83
+ raise ProgrammingError,
84
+ "Bad query initialization parameter class: #{query.class}"
85
+ end
86
+
87
+ debug { query }
88
+ @query = query # keep original string
89
+ query = query.dup
90
+
91
+ # replace string literals with 'n' placeholders (also see #substitute_literals)
92
+ @strings = []
93
+ query.gsub!(/'((?:''|[^'])*)'/m) do
94
+ @strings.push $1.gsub("''", "'") # keep unescaped string
95
+ "'" + (@strings.size - 1).to_s + "'"
96
+ end
97
+
98
+ match = QUERY.match(query) or raise ProgrammingError,
99
+ "Malformed query: are keywords SELECT, INSERT, UPDATE or WHERE missing?"
100
+ match, @key, @nodes, @pattern, @negative, @optional, @literal,
101
+ @group, @order, @order_dir, @ns = match.to_a.collect {|m| m.to_s }
102
+ match = nil
103
+ @key.upcase!
104
+ @order_dir.upcase!
105
+
106
+ # namespaces
107
+ # todo: validate ns
108
+ @ns = (@ns.empty? or /\APRESET\s+NS\z/ =~ @ns) ? config.ns :
109
+ Hash[*@ns.gsub(/\b(FOR|AS|AND)\b/i, '').scan(/\S+/)]
110
+ @pattern = parse_pattern(@pattern)
111
+ @optional = parse_pattern(@optional)
112
+ @negative = parse_pattern(@negative)
113
+
114
+ # validate SQL expressions
115
+ validate_expression(@literal)
116
+ @group.split(/\s*,\s*/).each {|group| validate_expression(group) }
117
+ validate_expression(@order)
118
+
119
+ @sql_mapper = SqlMapper.new(
120
+ config, @pattern, @negative, @optional, @literal)
121
+
122
+ # check that all variables can be bound
123
+ @variables = query.scan(BN_SCAN)
124
+ @variables.each {|node| @sql_mapper.bind(node) }
125
+
126
+ return self
127
+ end
128
+
129
+ # blank variables control section
130
+ attr_reader :nodes
131
+
132
+ # query pattern graph as array of triples [ [p, s, o], ... ]
133
+ attr_reader :pattern
134
+
135
+ # literal SQL expression
136
+ attr_reader :literal
137
+
138
+ # SQL GROUP BY expression
139
+ attr_reader :group
140
+
141
+ # SQL order expression
142
+ attr_reader :order
143
+
144
+ # direction of order, ASC or DESC
145
+ attr_reader :order_dir
146
+
147
+ # query namespaces mapping
148
+ attr_reader :ns
149
+
150
+ # list of variables defined in the query
151
+ attr_reader :variables
152
+
153
+ # returns original string passed in for parsing
154
+ #
155
+ def to_s
156
+ @query
157
+ end
158
+
159
+ # replace 'n' substitutions with query string literals (see #new, #LITERAL)
160
+ #
161
+ def substitute_literals(s)
162
+ return s unless s.kind_of? String
163
+ s.gsub(LITERAL_SCAN) do
164
+ get_literal_value($1.to_i)
165
+ end
166
+ end
167
+
168
+ # replace schema uri with namespace prefix
169
+ #
170
+ def SquishQuery.uri_shrink!(uriref, prefix, uri)
171
+ uriref.gsub!(/\A#{uri}([^\/#]+)\z/) {"#{prefix}::#{$1}"}
172
+ end
173
+
174
+ # replace schema uri with a prefix from a supplied namespaces hash
175
+ #
176
+ def SquishQuery.ns_shrink(uriref, namespaces)
177
+ u = uriref.dup or return nil
178
+ namespaces.each {|p, uri| SquishQuery.uri_shrink!(u, p, uri) and break }
179
+ return u
180
+ end
181
+
182
+ # replace schema uri with a prefix from query namespaces
183
+ #
184
+ def ns_shrink(uriref)
185
+ SquishQuery.ns_shrink(uriref, @ns)
186
+ end
187
+
188
+ # validate expression
189
+ #
190
+ # expression := value [ operator expression ]
191
+ #
192
+ # value := blank_node | literal_string | number | '(' expression ')'
193
+ #
194
+ # whitespace between tokens (except inside parentheses) is mandatory
195
+ #
196
+ def validate_expression(string)
197
+ # todo: lexical analyser
198
+ string.split(/[\s(),]+/).each do |token|
199
+ case token
200
+ when '', BN, PARAMETER, LITERAL, NUMBER, OPERATOR, AGGREGATE
201
+ else
202
+ raise ProgrammingError, "Bad token '#{token}' in expression"
203
+ end
204
+ end
205
+ string
206
+ end
207
+
208
+ private
209
+
210
+ PATTERN_SCAN = Regexp.new(/\A\((\S+)\s+(\S+)\s+(.*?)(?:\s+FILTER\b\s*(.*?)\s*)?(?:\s+(TRANSITIVE)\s*)?\)\z/).freeze
211
+
212
+ # parse query pattern graph out of a string, expand URI namespaces
213
+ #
214
+ def parse_pattern(pattern)
215
+ pattern.scan(/\(.*?\)(?=\s*(?:\(|\z))/).collect do |c|
216
+ match, predicate, subject, object, filter, transitive = c.match(PATTERN_SCAN).to_a
217
+ match = nil
218
+
219
+ [predicate, subject, object].each do |u|
220
+ u.sub!(/\A(\S+?)::/) do
221
+ @ns[$1] or raise ProgrammingError, "Undefined namespace prefix #{$1}"
222
+ end
223
+ end
224
+
225
+ validate_expression(filter.to_s)
226
+
227
+ [predicate, subject, object, filter, 'TRANSITIVE' == transitive]
228
+ end
229
+ end
230
+
231
+ # replace RDF query parameters with their values
232
+ #
233
+ def expression_value(expr, params={})
234
+ case expr
235
+ when 'NULL'
236
+ nil
237
+ when PARAMETER
238
+ get_parameter_value($1, params)
239
+ when LITERAL
240
+ @strings[$1.to_i]
241
+ else
242
+ expr.gsub(PARAMETER_AND_LITERAL_SCAN) do
243
+ if $1 # parameter
244
+ get_parameter_value($1, params)
245
+ else # literal
246
+ get_literal_value($2.to_i)
247
+ end
248
+ end
249
+ # fixme: make Sequel treat it as SQL expression, not a string value
250
+ end
251
+ end
252
+
253
+ def get_parameter_value(name, params)
254
+ key = name.to_sym
255
+ params.has_key?(key) or raise ProgrammingError,
256
+ 'Unknown parameter :' + name
257
+ params[key]
258
+ end
259
+
260
+ def get_literal_value(i)
261
+ "'" + @strings[i].gsub("'", "''") + "'"
262
+ end
263
+ end
264
+
265
+
266
+ class SquishSelect < SquishQuery
267
+ def initialize(config, query)
268
+ super(config, query)
269
+
270
+ if @key # initialized from a String, not a Hash
271
+ 'SELECT' == @key or raise ProgrammingError,
272
+ 'Wrong query type: SELECT expected intead of ' + @key
273
+
274
+ @nodes = @nodes.split(/\s*,\s*/).map {|node|
275
+ validate_expression(node)
276
+ }
277
+ end
278
+ end
279
+
280
+ # translate Squish SELECT query to SQL
281
+ #
282
+ def to_sql
283
+ where = @sql_mapper.where
284
+
285
+ select = @nodes.dup
286
+ select.push(@order) unless @order.empty? or @nodes.include?(@order)
287
+
288
+ # now put it all together
289
+ sql = %{\nFROM #{@sql_mapper.from}}
290
+ sql << %{\nWHERE #{where}} unless where.empty?
291
+ sql << %{\nGROUP BY #{@group}} unless @group.empty?
292
+ sql << %{\nORDER BY #{@order} #{@order_dir}} unless @order.empty?
293
+
294
+ select = select.map do |expr|
295
+ bind_blank_nodes(expr) + (BN.match(expr) ? (' AS ' + $1) : '')
296
+ end
297
+ sql = 'SELECT DISTINCT ' << select.join(', ') << bind_blank_nodes(sql)
298
+
299
+ sql =~ /\?/ and raise ProgrammingError,
300
+ "Unexpected '?' in translated query (probably, caused by unmapped blank node): #{sql.gsub(/\s+/, ' ')};"
301
+
302
+ substitute_literals(sql)
303
+ end
304
+
305
+ private
306
+
307
+ # replace blank node names with bindings
308
+ #
309
+ def bind_blank_nodes(sql)
310
+ sql.gsub(BN_SCAN) {|node| @sql_mapper.bind(node) }
311
+ end
312
+ end
313
+
314
+
315
+ class SquishAssert < SquishQuery
316
+ def initialize(config, query)
317
+ @config = config
318
+ super(@config, query)
319
+
320
+ if 'UPDATE' == @key
321
+ @insert = ''
322
+ @update = @nodes
323
+
324
+ elsif 'INSERT' == @key and @nodes =~ /\A\s*(.*?)\s*(?:\bUPDATE\b\s*(.*?))?\s*\z/
325
+ @insert, @update = $1, $2.to_s
326
+
327
+ else
328
+ raise ProgrammingError,
329
+ "Wrong query type: INSERT or UPDATE expected instead of " + @key
330
+ end
331
+
332
+ @insert = @insert.split(/\s*,\s*/).each {|s|
333
+ s =~ BN or raise ProgrammingError,
334
+ "Blank node expected in INSERT section instead of '#{s}'"
335
+ }
336
+
337
+ @update = @update.empty? ? {} : Hash[*@update.split(/\s*,\s*/).collect {|s|
338
+ s.split(/\s*=\s*/)
339
+ }.each {|node, value|
340
+ node =~ BN or raise ProgrammingError,
341
+ "Blank node expected on the left side of UPDATE assignment instead of '#{bn}'"
342
+ validate_expression(value)
343
+ }.flatten!]
344
+ end
345
+
346
+ def run(db, params={})
347
+ values = resource_values(db, params)
348
+
349
+ statements = []
350
+ alias_positions.each do |alias_, clauses|
351
+ statement = SquishAssertStatement.new(clauses, values)
352
+ statements.push(statement) if statement.action
353
+ end
354
+ SquishAssertStatement.run_ordered_statements(db, statements)
355
+
356
+ return @insert.collect {|node| values[node].value }
357
+ end
358
+
359
+ attr_reader :insert, :update
360
+
361
+ private
362
+
363
+ def resource_values(db, params)
364
+ values = {}
365
+ @sql_mapper.nodes.each do |node, n|
366
+ new = false
367
+
368
+ if node =~ INTERNAL # internal resource
369
+ value = $1.to_i # resource id
370
+
371
+ elsif node =~ PARAMETER
372
+ value = get_parameter_value($1, params)
373
+
374
+ elsif node =~ LITERAL
375
+ value = @strings[$1.to_i]
376
+
377
+ elsif node =~ BN
378
+ subject_position = n[:positions].select {|p| :subject == p[:role] }.first
379
+
380
+ if subject_position.nil? # blank node occuring only in object position
381
+ value = @update[node] or raise ProgrammingError,
382
+ %{Blank node #{node} is undefined (drop it or set its value in UPDATE section)}
383
+ value = expression_value(value, params)
384
+
385
+ else # resource blank node
386
+ unless @insert.include?(node)
387
+ s = SquishSelect.new(
388
+ @config, {
389
+ :nodes => [node],
390
+ :pattern => subgraph(node),
391
+ :strings => @strings
392
+ }
393
+ )
394
+ debug { db[s.to_sql, params].select_sql }
395
+ found = db.fetch(s.to_sql, params).first
396
+ end
397
+
398
+ if found
399
+ value = found.values.first
400
+
401
+ else
402
+ table = @sql_mapper.clauses[ subject_position[:clause] ][:map].table
403
+ value = db[:resource].insert(:label => table)
404
+ debug { db[:resource].insert_sql(:label => table) }
405
+ new = true unless 'resource' == table
406
+ end
407
+ end
408
+
409
+ else # external resource
410
+ uriref = { :uriref => true, :label => node }
411
+ found = db[:resource].filter(uriref).first
412
+ if found
413
+ value = found[:id]
414
+ else
415
+ value = db[:resource].insert(uriref)
416
+ debug { db[:resource].insert_sql(uriref) }
417
+ end
418
+ end
419
+
420
+ debug { node + ' = ' + value.inspect }
421
+ v = SquishAssertValue.new(value, new, @update.has_key?(node))
422
+ values[node] = v
423
+ end
424
+
425
+ debug { values.inspect }
426
+ values
427
+ end
428
+
429
+ def alias_positions
430
+ a = {}
431
+ @sql_mapper.clauses.each_with_index do |clause, i|
432
+ a[ clause[:alias] ] ||= []
433
+ a[ clause[:alias] ].push(clause)
434
+ end
435
+ a
436
+ end
437
+
438
+ # calculate subgraph of query pattern that is reachable from _node_
439
+ #
440
+ # fixme: make it work with optional sub-patterns
441
+ #
442
+ def subgraph(node)
443
+ subgraph = [node]
444
+ w = []
445
+ begin
446
+ stop = true
447
+ @pattern.each do |triple|
448
+ if subgraph.include? triple[1] and not w.include? triple
449
+ subgraph.push triple[2]
450
+ w.push triple
451
+ stop = false
452
+ end
453
+ end
454
+ end until stop
455
+ return w
456
+ end
457
+ end
458
+
459
+
460
+ class SquishAssertValue
461
+ def initialize(value, new, updated)
462
+ @value = value
463
+ @new = new
464
+ @updated = updated
465
+ end
466
+
467
+ attr_reader :value
468
+
469
+ # true if node was inserted into resource during value generation and a
470
+ # corresponding record should be inserted into an internal resource table
471
+ # later
472
+ #
473
+ def new?
474
+ @new
475
+ end
476
+
477
+ # true if the node value is set in the UPDATE section of the Squish statement
478
+ #
479
+ def updated?
480
+ @updated
481
+ end
482
+ end
483
+
484
+
485
+ class SquishAssertStatement
486
+ include Debug
487
+
488
+ def initialize(clauses, values)
489
+ @key_node = clauses.first[:subject][:node]
490
+ @table = clauses.first[:map].table.to_sym
491
+
492
+ key = values[@key_node]
493
+
494
+ @params = {}
495
+ @references = []
496
+ clauses.each do |clause|
497
+ node = clause[:object][:node]
498
+ v = values[node]
499
+
500
+ if key.new? or v.updated?
501
+ field = clause[:object][:field]
502
+ @params[field.to_sym] = v.value
503
+
504
+ # when subproperty value is updated, update the qualifier as well
505
+ map = clause[:map]
506
+ if map.subproperty_of
507
+ @params[ RdfPropertyMap.qualifier_field(field).to_sym ] = values[map.property].value
508
+ elsif map.superproperty?
509
+ @params[ RdfPropertyMap.qualifier_field(field).to_sym ] = nil
510
+ end
511
+
512
+ @references.push(node) if v.new?
513
+ end
514
+ end
515
+
516
+ if key.new? and @table != :resource
517
+ # when id is inserted, insert_resource() trigger does nothing
518
+ @action = :insert
519
+ @params[:id] = key.value
520
+
521
+ elsif not @params.empty?
522
+ @action = :update
523
+ @filter = {:id => key.value}
524
+ end
525
+
526
+ debug { self.inspect }
527
+ end
528
+
529
+ attr_reader :key_node, :references, :action
530
+
531
+ def run(db)
532
+ if @action
533
+ ds = db[@table]
534
+ ds = ds.filter(@filter) if @filter
535
+ debug { :insert == @action ? ds.insert_sql(@params) : ds.update_sql(@params) }
536
+ ds.send(@action, @params)
537
+ end
538
+ end
539
+
540
+ # make sure mutually referencing records are inserted in the right order
541
+ #
542
+ def SquishAssertStatement.run_ordered_statements(db, statements)
543
+ statements = statements.sort_by {|s| s.references.size }
544
+ inserted = []
545
+
546
+ progress = true
547
+ until statements.empty? or not progress
548
+ progress = false
549
+
550
+ 0.upto(statements.size - 1) do |i|
551
+ s = statements[i]
552
+ if (s.references - inserted).empty?
553
+ s.run(db)
554
+ inserted.push(s.key_node)
555
+ statements.delete_at(i)
556
+ progress = true
557
+ break
558
+ end
559
+ end
560
+ end
561
+
562
+ statements.empty? or raise ProgrammingError,
563
+ "Failed to resolve mutual references of inserted resources: " +
564
+ statements.collect {|s| s.key_node + ' -- ' + s.references.join(', ') }.join('; ')
565
+ end
566
+ end
567
+
568
+ end