blacklight_advanced_search 1.0.0pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE +14 -0
  3. data/README.rdoc +172 -0
  4. data/Rakefile +6 -0
  5. data/VERSION +1 -0
  6. data/app/controllers/advanced_controller.rb +61 -0
  7. data/app/controllers/application_controller.rb +5 -0
  8. data/app/helpers/advanced_helper.rb +40 -0
  9. data/app/views/advanced/_advanced_search_facets.html.erb +16 -0
  10. data/app/views/advanced/_advanced_search_fields.html.erb +6 -0
  11. data/app/views/advanced/_advanced_search_form.html.erb +48 -0
  12. data/app/views/advanced/_advanced_search_help.html.erb +22 -0
  13. data/app/views/advanced/index.html.erb +10 -0
  14. data/app/views/blacklight_advanced_search/_facet_limit.html.erb +25 -0
  15. data/blacklight_advanced_search.gemspec +24 -0
  16. data/config/routes.rb +3 -0
  17. data/install.rb +0 -0
  18. data/lib/blacklight_advanced_search/advanced_query_parser.rb +61 -0
  19. data/lib/blacklight_advanced_search/catalog_helper_override.rb +53 -0
  20. data/lib/blacklight_advanced_search/controller.rb +101 -0
  21. data/lib/blacklight_advanced_search/engine.rb +47 -0
  22. data/lib/blacklight_advanced_search/filter_parser.rb +13 -0
  23. data/lib/blacklight_advanced_search/parsing_nesting_parser.rb +18 -0
  24. data/lib/blacklight_advanced_search/render_constraints_override.rb +96 -0
  25. data/lib/blacklight_advanced_search/version.rb +10 -0
  26. data/lib/blacklight_advanced_search.rb +74 -0
  27. data/lib/generators/blacklight_advanced_search/assets_generator.rb +25 -0
  28. data/lib/generators/blacklight_advanced_search/blacklight_advanced_search_generator.rb +11 -0
  29. data/lib/generators/blacklight_advanced_search/templates/_search_form.html.erb +13 -0
  30. data/lib/generators/blacklight_advanced_search/templates/blacklight_advanced_search_config.rb +86 -0
  31. data/lib/generators/blacklight_advanced_search/templates/public/javascripts/blacklight_advanced_search_javascript.js +62 -0
  32. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/advanced_results.css +41 -0
  33. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/blacklight_advanced_search_styles.css +129 -0
  34. data/lib/parsing_nesting/Readme.rdoc +160 -0
  35. data/lib/parsing_nesting/grammar.rb +78 -0
  36. data/lib/parsing_nesting/tree.rb +457 -0
  37. data/spec/lib/filter_parser_spec.rb +28 -0
  38. data/spec/parsing_nesting/build_tree_spec.rb +238 -0
  39. data/spec/parsing_nesting/consuming_spec.rb +49 -0
  40. data/spec/parsing_nesting/to_solr_spec.rb +360 -0
  41. data/spec/rcov.opts +3 -0
  42. data/spec/spec.opts +4 -0
  43. data/spec/spec_helper.rb +9 -0
  44. data/spec/support/blacklight_mock.rb +5 -0
  45. data/uninstall.rb +1 -0
  46. metadata +164 -0
@@ -0,0 +1,457 @@
1
+ require 'parsing_nesting/grammar'
2
+ module ParsingNesting::Tree
3
+
4
+ # Get parslet output for string (parslet output is json-y objects), and
5
+ # transform to an actual abstract syntax tree made up of more semantic
6
+ # ruby objects, Node's. The top one will always be a List.
7
+ #
8
+ # Call #to_query on resulting Node in order to transform to Solr query,
9
+ # optionally passing in Solr params to be used as LocalParams in nested
10
+ # dismax queries.
11
+ #
12
+ # Our approach here works, but as we have to put in special cases
13
+ # it starts getting messy. Ideally we might want to actually transform
14
+ # the Object graph (abstract syntax tree) instead of trying to handle
15
+ # special cases in #to_query.
16
+ # For instance, transform object graph for a problematic pure-negative
17
+ # clause to the corresponding object graph without that (-a AND -b) ==>
18
+ # (NOT (a OR b). Transform (NOT NOT a) to (a). That would probably be
19
+ # more robust. But instead we handle special cases in to_query, which
20
+ # means the special cases tend to multiply and need to be handled at
21
+ # multiple levels. But it's working for now.
22
+ #
23
+ # the #negate method was an experiment in transforming parse tree in
24
+ # place, but isn't being used. But it's left as a sign post.
25
+ def self.parse(string)
26
+ to_node_tree(ParsingNesting::Grammar.new.parse(string))
27
+ end
28
+
29
+
30
+ # theoretically Parslet's Transform could be used for this, but I think the
31
+ # manner in which I'm parsing to Parslet labelled hash isn't exactly what
32
+ # Parslet Transform is set up to work with, I couldn't figure it out. But
33
+ # easy enough to do 'manually'.
34
+ def self.to_node_tree(tree)
35
+ if tree.kind_of? Array
36
+ # at one point I was normalizing top-level lists of one item to just
37
+ # be that item, no list wrapper. But having the list wrapper
38
+ # at the top level is actually useful for Solr output.
39
+ List.new( tree.collect {|i| to_node_tree(i)})
40
+ elsif tree.kind_of? Hash
41
+ if list = tree[:list]
42
+ List.new( list.collect {|i| to_node_tree(i)} )
43
+ elsif tree.has_key?(:and_list)
44
+ AndList.new( tree[:and_list].collect{|i| to_node_tree(i) } )
45
+ elsif tree.has_key?(:or_list)
46
+ OrList.new( tree[:or_list].collect{|i| to_node_tree(i) } )
47
+ elsif not_payload = tree[:not_expression]
48
+ NotExpression.new( to_node_tree(not_payload) )
49
+ elsif tree.has_key?(:mandatory)
50
+ MandatoryClause.new( to_node_tree(tree[:mandatory] ))
51
+ elsif tree.has_key?(:excluded)
52
+ ExcludedClause.new( to_node_tree(tree[:excluded]))
53
+ elsif phrase = tree[:phrase]
54
+ Phrase.new( phrase )
55
+ elsif tree.has_key?(:token)
56
+ Term.new( tree[:token].to_s )
57
+ end
58
+ end
59
+ end
60
+
61
+ class Node
62
+ # this default to_query works well for anything that is embeddable in
63
+ # a standard way.
64
+ # non-embeddable nodes will have to override and do it different.
65
+ def to_query(solr_params)
66
+ build_nested_query([self], solr_params)
67
+ end
68
+
69
+ protected # some utility methods
70
+
71
+ # Builds a query from a list of Node's that have #to_embed, and some
72
+ # solr params to embed as LocalParams.
73
+ #
74
+ # By default will create a nested _query_, handling escaping appropriately.
75
+ # but pass in :always_nested=>false, and it will sometimes be an ordinary
76
+ # query where possible. (possibly still with LocalParams).
77
+ #
78
+ # LocalParams will be default have "!dismax" added to them, but set
79
+ # :force_deftype to something else (or nil) if you want.
80
+ #
81
+ # Also takes care of simple "pure negative" queries like "-one -two",
82
+ # converting them to a nested NOT query that will be handled appropriately.
83
+ # those simple negatives can't be handled right by dismax otherwise.
84
+ def build_nested_query(embeddables, solr_params={}, options = {})
85
+ options = {:always_nested => true,
86
+ :force_deftype => "dismax"}.merge(options)
87
+
88
+ # if it's pure negative, we need to transform
89
+ if embeddables.find_all{|n| n.kind_of?(ExcludedClause)}.length == embeddables.length
90
+ negated = NotExpression.new( List.new(embeddables.collect {|n| n.operand}))
91
+ solr_params = solr_params.merge(:mm => "1")
92
+ return negated.to_query(solr_params)
93
+ else
94
+
95
+ inner_query = build_local_params(solr_params, options[:force_deftype]) +
96
+ embeddables.collect {|n| n.to_embed}.join(" ")
97
+
98
+ if options[:always_nested]
99
+ return '_query_:"' + bs_escape(inner_query) + '"'
100
+ else
101
+ return inner_query
102
+ end
103
+
104
+ end
105
+ end
106
+
107
+
108
+ # Pass in nil 2nd argument if you DON'T want to embed
109
+ # "!dismax" in your local params. Used by #to_single_query_params
110
+ def build_local_params(hash = {}, force_deftype = "dismax")
111
+ # we insist on dismax for our embedded queries, or whatever
112
+ # other defType supplied in 2nd argument.
113
+ hash = hash.dup
114
+ if force_deftype
115
+ hash[:defType] = force_deftype
116
+ hash.delete("defType") # avoid weird colision with hard to debug results
117
+ end
118
+
119
+ if (hash.size > 0)
120
+ defType = hash.delete(:defType) || hash.delete("defType")
121
+ "{!" + (defType ? "#{defType} " : "") + hash.collect {|k,v| "#{k}=#{ v.to_s.include?(" ") ? "'"+v+"'" : v }"}.join(" ") + "}"
122
+ else
123
+ #no local params!
124
+ ""
125
+ end
126
+ end
127
+
128
+ def bs_escape(val, char='"')
129
+ # crazy double escaping to actually get a single backslash
130
+ # in there without triggering regexp capture reference
131
+ val.gsub(char, '\\\\' + char)
132
+ end
133
+ end
134
+
135
+
136
+ class List < Node
137
+ attr_accessor :list
138
+ def initialize(aList)
139
+ self.list = aList
140
+ end
141
+ def can_embed?
142
+ false
143
+ end
144
+
145
+ def simple_pure_negative?
146
+ (list.find_all {|i| i.kind_of? ExcludedClause }.length) == list.length
147
+ end
148
+
149
+ def to_query(solr_params={})
150
+ queries = []
151
+
152
+ (embeddable, gen_full_query) = list.partition {|i| i.respond_to?(:can_embed?) && i.can_embed?}
153
+
154
+ unless embeddable.empty?
155
+ queries << build_nested_query(embeddable, solr_params)
156
+ end
157
+
158
+ gen_full_query.each do |node|
159
+ queries << node.to_query(solr_params)
160
+ end
161
+
162
+ queries.join(" AND ")
163
+ end
164
+
165
+ # Returns a Hash, assumes this will be the ONLY :q, used for
166
+ # parsing 'simple search' to Solr. Pass in params that need to
167
+ # be LOCAL solr params (using "{foo=bar}" embedded in query).
168
+ # Params that should be sent to Solr seperately are caller's responsibility,
169
+ # merge em into the returned hash.
170
+ #
171
+ # For very simple queries, this will produce an ordinary Solr q
172
+ # much like would be produced ordinarily. But for AND/OR/NOT, will
173
+ # sometimes include multiple nested queries instead.
174
+ #
175
+ # This method will still sometimes return a single nested _query_, that
176
+ # could theoretically really be ordinary query possibly with localparams.
177
+ # It still works, but isn't optimizing for a simpler query, because
178
+ # it's using much of the same code used for combining multiple fields
179
+ # that need nested queries. Maybe we'll optimize later, but the code
180
+ # gets tricky.
181
+ def to_single_query_params(solr_local_params)
182
+ # Can it be expressed in a single dismax?
183
+
184
+ if list.find_all {|i| i.respond_to?(:can_embed?) && i.can_embed? }.length == list.length
185
+ {
186
+ #build_local_params(solr_local_params, nil) + list.collect {|n| n.to_embed}.join(" "),
187
+ :q => build_nested_query(list, solr_local_params, :always_nested => false, :force_deftype => nil),
188
+ :defType => "dismax"
189
+ }
190
+ else
191
+ # Can't be expressed in a single dismax, do it the normal way
192
+ {
193
+ :q => self.to_query(solr_local_params),
194
+ :defType => "lucene"
195
+ }
196
+ end
197
+ end
198
+
199
+ def negate
200
+ List.new(list.collect {|i| i.negate})
201
+ end
202
+
203
+ end
204
+
205
+ class AndList < List
206
+
207
+ # We make an and-list embeddable only if all it's elements
208
+ # are embeddable, then no problem we just embed them all
209
+ # as Solr '+' mandatory, and achieve the AND.
210
+ # For now, pure negative is considered not embeddable, although
211
+ # theoretically it could sometimes be embedded if transformed
212
+ # properly.
213
+ def can_embed?
214
+ (! simple_pure_negative?) && ! list.collect {|i| i.can_embed?}.include?(false)
215
+ end
216
+
217
+ # Only if all operands are embeddable.
218
+ # Trick is if they were bare terms/phrases, we add a '+' on
219
+ # front, but if they already were +/-, then we don't need to,
220
+ # and leaving them along will have desired semantics.
221
+ # This works even on "-", because dismax mm seems to not consider "-"
222
+ # clauses, they are always required regardless of mm.
223
+ def to_embed
224
+ list.collect do |operand|
225
+ s = operand.to_embed
226
+ if s =~ /^\+/ || s =~ /^\-/
227
+ s
228
+ else
229
+ '+'+s
230
+ end
231
+ end.join(" ")
232
+ end
233
+
234
+ # for those that aren't embeddable, or pure negative
235
+ def to_query(local_params)
236
+ if simple_pure_negative?
237
+ # Can do it in one single nested dismax, if we're simple arguments
238
+ # that are pure negative.
239
+ # build_nested_query will handle negating the pure negative for
240
+ # us.
241
+ build_nested_query(list, local_params)
242
+ else
243
+ "( " +
244
+ list.collect do |i|
245
+ i.to_query(local_params)
246
+ end.join(" AND ") +
247
+ " )"
248
+ end
249
+ end
250
+
251
+ # convent logical property here, not(a AND b) === not(a) OR not(b)
252
+ def negate
253
+ OrList.new( list.collect {|n| n.negate} )
254
+ end
255
+
256
+ end
257
+
258
+
259
+ class OrList < List
260
+
261
+ # never embeddable
262
+ def can_embed?
263
+ false
264
+ end
265
+
266
+
267
+ def to_query(local_params)
268
+ # Okay, we're never embeddable as such, but sometimes we can
269
+ # turn our operands into one single nested dismax query with mm=1, when
270
+ # all our operands are 'simple', other times we need to actually do
271
+ # two seperate nested queries seperated by lucene OR.
272
+ # If all our children are embeddable but _not_ an "AndList", we can
273
+ # do the one query part. The AndList is theoretically embeddable, but
274
+ # not in a way compatible with flattening an OR to one query.
275
+ # Sorry, this part is one of the least clean part of this code!
276
+
277
+ not_flattenable = list.find {|i| ! (i.can_embed? && ! i.kind_of?(AndList) )}
278
+
279
+ if not_flattenable
280
+ to_multi_queries(local_params)
281
+ elsif simple_pure_negative?
282
+ to_simple_pure_negative_query(local_params)
283
+ else
284
+ to_one_dismax_query(local_params)
285
+ end
286
+ end
287
+
288
+ # build_nested_query isn't smart enough to handle refactoring
289
+ # a simple pure negative "OR", that needs an mm of 100%.
290
+ # Let's just do it ourselves. What we're doing makes more sense
291
+ # if you remember that:
292
+ # -a OR -b === NOT (a AND b)
293
+ def to_simple_pure_negative_query(local_params)
294
+ # take em out of their ExcludedClauses
295
+ embeddables = list.collect {|n| n.operand}
296
+ # and insist on mm 100%
297
+ solr_params = local_params.merge(:mm => "100%")
298
+
299
+ # and put the NOT in front to preserve semantics.
300
+ return 'NOT _query_:"' +
301
+ bs_escape(build_local_params(solr_params) +
302
+ embeddables.collect {|n| n.to_embed}.join(" ")) +
303
+ '"'
304
+ end
305
+
306
+ # all our arguments are 'simple' (terms and phrases with +/-),
307
+ # put am all in one single dismax with mm forced to 1.
308
+ def to_one_dismax_query(local_params)
309
+ build_nested_query(list, local_params.merge(:mm => "1"))
310
+ end
311
+
312
+ def to_multi_queries(local_params)
313
+ "( " +
314
+ list.collect do |i|
315
+ if i.kind_of?(NotExpression) || (i.respond_to?(:simple_pure_negative?) && i.simple_pure_negative?)
316
+ # need special handling to work around Solr 1.4.1's lack of handling
317
+ # of pure negative in an OR
318
+ "(*:* AND #{i.to_query(local_params)})"
319
+ else
320
+ i.to_query(local_params)
321
+ end
322
+ end.join(" OR ") +
323
+ " )"
324
+ end
325
+
326
+ # convenient logical property here, not(a OR b) === not(a) AND not(b)
327
+ def negate
328
+ AndList.new( list.collect {|n| n.negate})
329
+ end
330
+
331
+ end
332
+
333
+
334
+ class NotExpression
335
+ def initialize(exp)
336
+ self.operand = exp
337
+ end
338
+ attr_accessor :operand
339
+
340
+ # We have to do the weird thing with *:* AND NOT (real thing), because
341
+ # Solr 1.4.1 seems not to be able to handle "x OR NOT y" otherwise, at least
342
+ # in some cases, but does fine with
343
+ # "x OR (*:* AND NOT y)", which should mean the same thing.
344
+ def to_query(solr_params)
345
+ # rescue double-nots to not treat them crazy-like and make the query
346
+ # more work for Solr than it needs to be with a double-negative.
347
+ if operand.kind_of?(NotExpression)
348
+ operand.operand.to_query(solr_params)
349
+ else
350
+ "NOT " + operand.to_query(solr_params)
351
+ end
352
+ end
353
+
354
+ def can_embed?
355
+ false
356
+ end
357
+
358
+
359
+
360
+ def negate
361
+ operand
362
+ end
363
+ end
364
+
365
+ class MandatoryClause < Node
366
+ attr_accessor :operand
367
+ def initialize(v)
368
+ self.operand = v
369
+ end
370
+
371
+ def can_embed?
372
+ #right now '+' clauses only apply to terms/phrases
373
+ #which we can embed with a + in front.
374
+ true
375
+ end
376
+ def to_embed
377
+ '+' + operand.to_embed
378
+ end
379
+
380
+ # negating mandatory to excluded is decent semantics, although
381
+ # it's not strictly 'true', it's a choice.
382
+ def negate
383
+ ExcludedClause.new( operand )
384
+ end
385
+ end
386
+
387
+ class ExcludedClause < Node
388
+ attr_accessor :operand
389
+
390
+ def initialize(v)
391
+ self.operand = v
392
+ end
393
+
394
+ def can_embed?
395
+ #right now '-' clauses only apply to terms/phrases, which
396
+ #we can embed with a '-' in front.
397
+ true
398
+ end
399
+
400
+ def to_embed
401
+ '-' + operand.to_embed
402
+ end
403
+
404
+ # negating excluded to mandatory is a pretty decent choice
405
+ def negate
406
+ MandatoryClause.new( operand )
407
+ end
408
+
409
+ def simple_pure_negative?
410
+ true
411
+ end
412
+
413
+ end
414
+
415
+
416
+ class Phrase < Node
417
+ attr_accessor :value
418
+
419
+ def initialize(string)
420
+ self.value = string
421
+ end
422
+
423
+ def can_embed?
424
+ true
425
+ end
426
+
427
+ def to_embed
428
+ '"' + value + '"'
429
+ end
430
+
431
+ def negate
432
+ ExcludedClause.new(self)
433
+ end
434
+ end
435
+
436
+ class Term < Node
437
+ attr_accessor :value
438
+
439
+ def initialize(string)
440
+ self.value = string
441
+ end
442
+
443
+ def can_embed?
444
+ true
445
+ end
446
+
447
+ def to_embed
448
+ value
449
+ end
450
+
451
+ def negate
452
+ ExcludedClause.new(self)
453
+ end
454
+ end
455
+ end
456
+
457
+
@@ -0,0 +1,28 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ def setFilters(f)
4
+ @filters = f
5
+ end
6
+
7
+ def filters
8
+ @filters
9
+ end
10
+
11
+
12
+ describe "BlacklightAdvancedSearch::FilterParser" do
13
+ include BlacklightAdvancedSearch::FilterParser
14
+
15
+ describe "filter processing" do
16
+ it "should generate an appropriate fq param" do
17
+ setFilters(:format => ["Book", "Thesis"], :location=>["Online", "Library"])
18
+
19
+ fq_params = generate_solr_fq
20
+
21
+ fq_params.find {|a| a =~ /format\:\((\"Book\"|\"Thesis\") +OR +(\"Thesis\"|\"Book\")/}.should_not be_nil
22
+
23
+ fq_params.find {|a| a =~ /location\:\((\"Library\"|\"Online\") +OR +(\"Library\"|\"Online\")/}.should_not be_nil
24
+
25
+
26
+ end
27
+ end
28
+ end