blacklight_advanced_search 1.0.0pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE +14 -0
  3. data/README.rdoc +172 -0
  4. data/Rakefile +6 -0
  5. data/VERSION +1 -0
  6. data/app/controllers/advanced_controller.rb +61 -0
  7. data/app/controllers/application_controller.rb +5 -0
  8. data/app/helpers/advanced_helper.rb +40 -0
  9. data/app/views/advanced/_advanced_search_facets.html.erb +16 -0
  10. data/app/views/advanced/_advanced_search_fields.html.erb +6 -0
  11. data/app/views/advanced/_advanced_search_form.html.erb +48 -0
  12. data/app/views/advanced/_advanced_search_help.html.erb +22 -0
  13. data/app/views/advanced/index.html.erb +10 -0
  14. data/app/views/blacklight_advanced_search/_facet_limit.html.erb +25 -0
  15. data/blacklight_advanced_search.gemspec +24 -0
  16. data/config/routes.rb +3 -0
  17. data/install.rb +0 -0
  18. data/lib/blacklight_advanced_search/advanced_query_parser.rb +61 -0
  19. data/lib/blacklight_advanced_search/catalog_helper_override.rb +53 -0
  20. data/lib/blacklight_advanced_search/controller.rb +101 -0
  21. data/lib/blacklight_advanced_search/engine.rb +47 -0
  22. data/lib/blacklight_advanced_search/filter_parser.rb +13 -0
  23. data/lib/blacklight_advanced_search/parsing_nesting_parser.rb +18 -0
  24. data/lib/blacklight_advanced_search/render_constraints_override.rb +96 -0
  25. data/lib/blacklight_advanced_search/version.rb +10 -0
  26. data/lib/blacklight_advanced_search.rb +74 -0
  27. data/lib/generators/blacklight_advanced_search/assets_generator.rb +25 -0
  28. data/lib/generators/blacklight_advanced_search/blacklight_advanced_search_generator.rb +11 -0
  29. data/lib/generators/blacklight_advanced_search/templates/_search_form.html.erb +13 -0
  30. data/lib/generators/blacklight_advanced_search/templates/blacklight_advanced_search_config.rb +86 -0
  31. data/lib/generators/blacklight_advanced_search/templates/public/javascripts/blacklight_advanced_search_javascript.js +62 -0
  32. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/advanced_results.css +41 -0
  33. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/blacklight_advanced_search_styles.css +129 -0
  34. data/lib/parsing_nesting/Readme.rdoc +160 -0
  35. data/lib/parsing_nesting/grammar.rb +78 -0
  36. data/lib/parsing_nesting/tree.rb +457 -0
  37. data/spec/lib/filter_parser_spec.rb +28 -0
  38. data/spec/parsing_nesting/build_tree_spec.rb +238 -0
  39. data/spec/parsing_nesting/consuming_spec.rb +49 -0
  40. data/spec/parsing_nesting/to_solr_spec.rb +360 -0
  41. data/spec/rcov.opts +3 -0
  42. data/spec/spec.opts +4 -0
  43. data/spec/spec_helper.rb +9 -0
  44. data/spec/support/blacklight_mock.rb +5 -0
  45. data/uninstall.rb +1 -0
  46. metadata +164 -0
@@ -0,0 +1,457 @@
1
+ require 'parsing_nesting/grammar'
2
+ module ParsingNesting::Tree
3
+
4
+ # Get parslet output for string (parslet output is json-y objects), and
5
+ # transform to an actual abstract syntax tree made up of more semantic
6
+ # ruby objects, Node's. The top one will always be a List.
7
+ #
8
+ # Call #to_query on resulting Node in order to transform to Solr query,
9
+ # optionally passing in Solr params to be used as LocalParams in nested
10
+ # dismax queries.
11
+ #
12
+ # Our approach here works, but as we have to put in special cases
13
+ # it starts getting messy. Ideally we might want to actually transform
14
+ # the Object graph (abstract syntax tree) instead of trying to handle
15
+ # special cases in #to_query.
16
+ # For instance, transform object graph for a problematic pure-negative
17
+ # clause to the corresponding object graph without that (-a AND -b) ==>
18
+ # (NOT (a OR b). Transform (NOT NOT a) to (a). That would probably be
19
+ # more robust. But instead we handle special cases in to_query, which
20
+ # means the special cases tend to multiply and need to be handled at
21
+ # multiple levels. But it's working for now.
22
+ #
23
+ # the #negate method was an experiment in transforming parse tree in
24
+ # place, but isn't being used. But it's left as a sign post.
25
+ def self.parse(string)
26
+ to_node_tree(ParsingNesting::Grammar.new.parse(string))
27
+ end
28
+
29
+
30
+ # theoretically Parslet's Transform could be used for this, but I think the
31
+ # manner in which I'm parsing to Parslet labelled hash isn't exactly what
32
+ # Parslet Transform is set up to work with, I couldn't figure it out. But
33
+ # easy enough to do 'manually'.
34
+ def self.to_node_tree(tree)
35
+ if tree.kind_of? Array
36
+ # at one point I was normalizing top-level lists of one item to just
37
+ # be that item, no list wrapper. But having the list wrapper
38
+ # at the top level is actually useful for Solr output.
39
+ List.new( tree.collect {|i| to_node_tree(i)})
40
+ elsif tree.kind_of? Hash
41
+ if list = tree[:list]
42
+ List.new( list.collect {|i| to_node_tree(i)} )
43
+ elsif tree.has_key?(:and_list)
44
+ AndList.new( tree[:and_list].collect{|i| to_node_tree(i) } )
45
+ elsif tree.has_key?(:or_list)
46
+ OrList.new( tree[:or_list].collect{|i| to_node_tree(i) } )
47
+ elsif not_payload = tree[:not_expression]
48
+ NotExpression.new( to_node_tree(not_payload) )
49
+ elsif tree.has_key?(:mandatory)
50
+ MandatoryClause.new( to_node_tree(tree[:mandatory] ))
51
+ elsif tree.has_key?(:excluded)
52
+ ExcludedClause.new( to_node_tree(tree[:excluded]))
53
+ elsif phrase = tree[:phrase]
54
+ Phrase.new( phrase )
55
+ elsif tree.has_key?(:token)
56
+ Term.new( tree[:token].to_s )
57
+ end
58
+ end
59
+ end
60
+
61
+ class Node
62
+ # this default to_query works well for anything that is embeddable in
63
+ # a standard way.
64
+ # non-embeddable nodes will have to override and do it different.
65
+ def to_query(solr_params)
66
+ build_nested_query([self], solr_params)
67
+ end
68
+
69
+ protected # some utility methods
70
+
71
+ # Builds a query from a list of Node's that have #to_embed, and some
72
+ # solr params to embed as LocalParams.
73
+ #
74
+ # By default will create a nested _query_, handling escaping appropriately.
75
+ # but pass in :always_nested=>false, and it will sometimes be an ordinary
76
+ # query where possible. (possibly still with LocalParams).
77
+ #
78
+ # LocalParams will be default have "!dismax" added to them, but set
79
+ # :force_deftype to something else (or nil) if you want.
80
+ #
81
+ # Also takes care of simple "pure negative" queries like "-one -two",
82
+ # converting them to a nested NOT query that will be handled appropriately.
83
+ # those simple negatives can't be handled right by dismax otherwise.
84
+ def build_nested_query(embeddables, solr_params={}, options = {})
85
+ options = {:always_nested => true,
86
+ :force_deftype => "dismax"}.merge(options)
87
+
88
+ # if it's pure negative, we need to transform
89
+ if embeddables.find_all{|n| n.kind_of?(ExcludedClause)}.length == embeddables.length
90
+ negated = NotExpression.new( List.new(embeddables.collect {|n| n.operand}))
91
+ solr_params = solr_params.merge(:mm => "1")
92
+ return negated.to_query(solr_params)
93
+ else
94
+
95
+ inner_query = build_local_params(solr_params, options[:force_deftype]) +
96
+ embeddables.collect {|n| n.to_embed}.join(" ")
97
+
98
+ if options[:always_nested]
99
+ return '_query_:"' + bs_escape(inner_query) + '"'
100
+ else
101
+ return inner_query
102
+ end
103
+
104
+ end
105
+ end
106
+
107
+
108
+ # Pass in nil 2nd argument if you DON'T want to embed
109
+ # "!dismax" in your local params. Used by #to_single_query_params
110
+ def build_local_params(hash = {}, force_deftype = "dismax")
111
+ # we insist on dismax for our embedded queries, or whatever
112
+ # other defType supplied in 2nd argument.
113
+ hash = hash.dup
114
+ if force_deftype
115
+ hash[:defType] = force_deftype
116
+ hash.delete("defType") # avoid weird colision with hard to debug results
117
+ end
118
+
119
+ if (hash.size > 0)
120
+ defType = hash.delete(:defType) || hash.delete("defType")
121
+ "{!" + (defType ? "#{defType} " : "") + hash.collect {|k,v| "#{k}=#{ v.to_s.include?(" ") ? "'"+v+"'" : v }"}.join(" ") + "}"
122
+ else
123
+ #no local params!
124
+ ""
125
+ end
126
+ end
127
+
128
+ def bs_escape(val, char='"')
129
+ # crazy double escaping to actually get a single backslash
130
+ # in there without triggering regexp capture reference
131
+ val.gsub(char, '\\\\' + char)
132
+ end
133
+ end
134
+
135
+
136
+ class List < Node
137
+ attr_accessor :list
138
+ def initialize(aList)
139
+ self.list = aList
140
+ end
141
+ def can_embed?
142
+ false
143
+ end
144
+
145
+ def simple_pure_negative?
146
+ (list.find_all {|i| i.kind_of? ExcludedClause }.length) == list.length
147
+ end
148
+
149
+ def to_query(solr_params={})
150
+ queries = []
151
+
152
+ (embeddable, gen_full_query) = list.partition {|i| i.respond_to?(:can_embed?) && i.can_embed?}
153
+
154
+ unless embeddable.empty?
155
+ queries << build_nested_query(embeddable, solr_params)
156
+ end
157
+
158
+ gen_full_query.each do |node|
159
+ queries << node.to_query(solr_params)
160
+ end
161
+
162
+ queries.join(" AND ")
163
+ end
164
+
165
+ # Returns a Hash, assumes this will be the ONLY :q, used for
166
+ # parsing 'simple search' to Solr. Pass in params that need to
167
+ # be LOCAL solr params (using "{foo=bar}" embedded in query).
168
+ # Params that should be sent to Solr seperately are caller's responsibility,
169
+ # merge em into the returned hash.
170
+ #
171
+ # For very simple queries, this will produce an ordinary Solr q
172
+ # much like would be produced ordinarily. But for AND/OR/NOT, will
173
+ # sometimes include multiple nested queries instead.
174
+ #
175
+ # This method will still sometimes return a single nested _query_, that
176
+ # could theoretically really be ordinary query possibly with localparams.
177
+ # It still works, but isn't optimizing for a simpler query, because
178
+ # it's using much of the same code used for combining multiple fields
179
+ # that need nested queries. Maybe we'll optimize later, but the code
180
+ # gets tricky.
181
+ def to_single_query_params(solr_local_params)
182
+ # Can it be expressed in a single dismax?
183
+
184
+ if list.find_all {|i| i.respond_to?(:can_embed?) && i.can_embed? }.length == list.length
185
+ {
186
+ #build_local_params(solr_local_params, nil) + list.collect {|n| n.to_embed}.join(" "),
187
+ :q => build_nested_query(list, solr_local_params, :always_nested => false, :force_deftype => nil),
188
+ :defType => "dismax"
189
+ }
190
+ else
191
+ # Can't be expressed in a single dismax, do it the normal way
192
+ {
193
+ :q => self.to_query(solr_local_params),
194
+ :defType => "lucene"
195
+ }
196
+ end
197
+ end
198
+
199
+ def negate
200
+ List.new(list.collect {|i| i.negate})
201
+ end
202
+
203
+ end
204
+
205
+ class AndList < List
206
+
207
+ # We make an and-list embeddable only if all it's elements
208
+ # are embeddable, then no problem we just embed them all
209
+ # as Solr '+' mandatory, and achieve the AND.
210
+ # For now, pure negative is considered not embeddable, although
211
+ # theoretically it could sometimes be embedded if transformed
212
+ # properly.
213
+ def can_embed?
214
+ (! simple_pure_negative?) && ! list.collect {|i| i.can_embed?}.include?(false)
215
+ end
216
+
217
+ # Only if all operands are embeddable.
218
+ # Trick is if they were bare terms/phrases, we add a '+' on
219
+ # front, but if they already were +/-, then we don't need to,
220
+ # and leaving them along will have desired semantics.
221
+ # This works even on "-", because dismax mm seems to not consider "-"
222
+ # clauses, they are always required regardless of mm.
223
+ def to_embed
224
+ list.collect do |operand|
225
+ s = operand.to_embed
226
+ if s =~ /^\+/ || s =~ /^\-/
227
+ s
228
+ else
229
+ '+'+s
230
+ end
231
+ end.join(" ")
232
+ end
233
+
234
+ # for those that aren't embeddable, or pure negative
235
+ def to_query(local_params)
236
+ if simple_pure_negative?
237
+ # Can do it in one single nested dismax, if we're simple arguments
238
+ # that are pure negative.
239
+ # build_nested_query will handle negating the pure negative for
240
+ # us.
241
+ build_nested_query(list, local_params)
242
+ else
243
+ "( " +
244
+ list.collect do |i|
245
+ i.to_query(local_params)
246
+ end.join(" AND ") +
247
+ " )"
248
+ end
249
+ end
250
+
251
+ # convent logical property here, not(a AND b) === not(a) OR not(b)
252
+ def negate
253
+ OrList.new( list.collect {|n| n.negate} )
254
+ end
255
+
256
+ end
257
+
258
+
259
+ class OrList < List
260
+
261
+ # never embeddable
262
+ def can_embed?
263
+ false
264
+ end
265
+
266
+
267
+ def to_query(local_params)
268
+ # Okay, we're never embeddable as such, but sometimes we can
269
+ # turn our operands into one single nested dismax query with mm=1, when
270
+ # all our operands are 'simple', other times we need to actually do
271
+ # two seperate nested queries seperated by lucene OR.
272
+ # If all our children are embeddable but _not_ an "AndList", we can
273
+ # do the one query part. The AndList is theoretically embeddable, but
274
+ # not in a way compatible with flattening an OR to one query.
275
+ # Sorry, this part is one of the least clean part of this code!
276
+
277
+ not_flattenable = list.find {|i| ! (i.can_embed? && ! i.kind_of?(AndList) )}
278
+
279
+ if not_flattenable
280
+ to_multi_queries(local_params)
281
+ elsif simple_pure_negative?
282
+ to_simple_pure_negative_query(local_params)
283
+ else
284
+ to_one_dismax_query(local_params)
285
+ end
286
+ end
287
+
288
+ # build_nested_query isn't smart enough to handle refactoring
289
+ # a simple pure negative "OR", that needs an mm of 100%.
290
+ # Let's just do it ourselves. What we're doing makes more sense
291
+ # if you remember that:
292
+ # -a OR -b === NOT (a AND b)
293
+ def to_simple_pure_negative_query(local_params)
294
+ # take em out of their ExcludedClauses
295
+ embeddables = list.collect {|n| n.operand}
296
+ # and insist on mm 100%
297
+ solr_params = local_params.merge(:mm => "100%")
298
+
299
+ # and put the NOT in front to preserve semantics.
300
+ return 'NOT _query_:"' +
301
+ bs_escape(build_local_params(solr_params) +
302
+ embeddables.collect {|n| n.to_embed}.join(" ")) +
303
+ '"'
304
+ end
305
+
306
+ # all our arguments are 'simple' (terms and phrases with +/-),
307
+ # put am all in one single dismax with mm forced to 1.
308
+ def to_one_dismax_query(local_params)
309
+ build_nested_query(list, local_params.merge(:mm => "1"))
310
+ end
311
+
312
+ def to_multi_queries(local_params)
313
+ "( " +
314
+ list.collect do |i|
315
+ if i.kind_of?(NotExpression) || (i.respond_to?(:simple_pure_negative?) && i.simple_pure_negative?)
316
+ # need special handling to work around Solr 1.4.1's lack of handling
317
+ # of pure negative in an OR
318
+ "(*:* AND #{i.to_query(local_params)})"
319
+ else
320
+ i.to_query(local_params)
321
+ end
322
+ end.join(" OR ") +
323
+ " )"
324
+ end
325
+
326
+ # convenient logical property here, not(a OR b) === not(a) AND not(b)
327
+ def negate
328
+ AndList.new( list.collect {|n| n.negate})
329
+ end
330
+
331
+ end
332
+
333
+
334
+ class NotExpression
335
+ def initialize(exp)
336
+ self.operand = exp
337
+ end
338
+ attr_accessor :operand
339
+
340
+ # We have to do the weird thing with *:* AND NOT (real thing), because
341
+ # Solr 1.4.1 seems not to be able to handle "x OR NOT y" otherwise, at least
342
+ # in some cases, but does fine with
343
+ # "x OR (*:* AND NOT y)", which should mean the same thing.
344
+ def to_query(solr_params)
345
+ # rescue double-nots to not treat them crazy-like and make the query
346
+ # more work for Solr than it needs to be with a double-negative.
347
+ if operand.kind_of?(NotExpression)
348
+ operand.operand.to_query(solr_params)
349
+ else
350
+ "NOT " + operand.to_query(solr_params)
351
+ end
352
+ end
353
+
354
+ def can_embed?
355
+ false
356
+ end
357
+
358
+
359
+
360
+ def negate
361
+ operand
362
+ end
363
+ end
364
+
365
+ class MandatoryClause < Node
366
+ attr_accessor :operand
367
+ def initialize(v)
368
+ self.operand = v
369
+ end
370
+
371
+ def can_embed?
372
+ #right now '+' clauses only apply to terms/phrases
373
+ #which we can embed with a + in front.
374
+ true
375
+ end
376
+ def to_embed
377
+ '+' + operand.to_embed
378
+ end
379
+
380
+ # negating mandatory to excluded is decent semantics, although
381
+ # it's not strictly 'true', it's a choice.
382
+ def negate
383
+ ExcludedClause.new( operand )
384
+ end
385
+ end
386
+
387
+ class ExcludedClause < Node
388
+ attr_accessor :operand
389
+
390
+ def initialize(v)
391
+ self.operand = v
392
+ end
393
+
394
+ def can_embed?
395
+ #right now '-' clauses only apply to terms/phrases, which
396
+ #we can embed with a '-' in front.
397
+ true
398
+ end
399
+
400
+ def to_embed
401
+ '-' + operand.to_embed
402
+ end
403
+
404
+ # negating excluded to mandatory is a pretty decent choice
405
+ def negate
406
+ MandatoryClause.new( operand )
407
+ end
408
+
409
+ def simple_pure_negative?
410
+ true
411
+ end
412
+
413
+ end
414
+
415
+
416
+ class Phrase < Node
417
+ attr_accessor :value
418
+
419
+ def initialize(string)
420
+ self.value = string
421
+ end
422
+
423
+ def can_embed?
424
+ true
425
+ end
426
+
427
+ def to_embed
428
+ '"' + value + '"'
429
+ end
430
+
431
+ def negate
432
+ ExcludedClause.new(self)
433
+ end
434
+ end
435
+
436
+ class Term < Node
437
+ attr_accessor :value
438
+
439
+ def initialize(string)
440
+ self.value = string
441
+ end
442
+
443
+ def can_embed?
444
+ true
445
+ end
446
+
447
+ def to_embed
448
+ value
449
+ end
450
+
451
+ def negate
452
+ ExcludedClause.new(self)
453
+ end
454
+ end
455
+ end
456
+
457
+
@@ -0,0 +1,28 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ def setFilters(f)
4
+ @filters = f
5
+ end
6
+
7
+ def filters
8
+ @filters
9
+ end
10
+
11
+
12
+ describe "BlacklightAdvancedSearch::FilterParser" do
13
+ include BlacklightAdvancedSearch::FilterParser
14
+
15
+ describe "filter processing" do
16
+ it "should generate an appropriate fq param" do
17
+ setFilters(:format => ["Book", "Thesis"], :location=>["Online", "Library"])
18
+
19
+ fq_params = generate_solr_fq
20
+
21
+ fq_params.find {|a| a =~ /format\:\((\"Book\"|\"Thesis\") +OR +(\"Thesis\"|\"Book\")/}.should_not be_nil
22
+
23
+ fq_params.find {|a| a =~ /location\:\((\"Library\"|\"Online\") +OR +(\"Library\"|\"Online\")/}.should_not be_nil
24
+
25
+
26
+ end
27
+ end
28
+ end