blacklight_advanced_search 1.0.0pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE +14 -0
  3. data/README.rdoc +172 -0
  4. data/Rakefile +6 -0
  5. data/VERSION +1 -0
  6. data/app/controllers/advanced_controller.rb +61 -0
  7. data/app/controllers/application_controller.rb +5 -0
  8. data/app/helpers/advanced_helper.rb +40 -0
  9. data/app/views/advanced/_advanced_search_facets.html.erb +16 -0
  10. data/app/views/advanced/_advanced_search_fields.html.erb +6 -0
  11. data/app/views/advanced/_advanced_search_form.html.erb +48 -0
  12. data/app/views/advanced/_advanced_search_help.html.erb +22 -0
  13. data/app/views/advanced/index.html.erb +10 -0
  14. data/app/views/blacklight_advanced_search/_facet_limit.html.erb +25 -0
  15. data/blacklight_advanced_search.gemspec +24 -0
  16. data/config/routes.rb +3 -0
  17. data/install.rb +0 -0
  18. data/lib/blacklight_advanced_search/advanced_query_parser.rb +61 -0
  19. data/lib/blacklight_advanced_search/catalog_helper_override.rb +53 -0
  20. data/lib/blacklight_advanced_search/controller.rb +101 -0
  21. data/lib/blacklight_advanced_search/engine.rb +47 -0
  22. data/lib/blacklight_advanced_search/filter_parser.rb +13 -0
  23. data/lib/blacklight_advanced_search/parsing_nesting_parser.rb +18 -0
  24. data/lib/blacklight_advanced_search/render_constraints_override.rb +96 -0
  25. data/lib/blacklight_advanced_search/version.rb +10 -0
  26. data/lib/blacklight_advanced_search.rb +74 -0
  27. data/lib/generators/blacklight_advanced_search/assets_generator.rb +25 -0
  28. data/lib/generators/blacklight_advanced_search/blacklight_advanced_search_generator.rb +11 -0
  29. data/lib/generators/blacklight_advanced_search/templates/_search_form.html.erb +13 -0
  30. data/lib/generators/blacklight_advanced_search/templates/blacklight_advanced_search_config.rb +86 -0
  31. data/lib/generators/blacklight_advanced_search/templates/public/javascripts/blacklight_advanced_search_javascript.js +62 -0
  32. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/advanced_results.css +41 -0
  33. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/blacklight_advanced_search_styles.css +129 -0
  34. data/lib/parsing_nesting/Readme.rdoc +160 -0
  35. data/lib/parsing_nesting/grammar.rb +78 -0
  36. data/lib/parsing_nesting/tree.rb +457 -0
  37. data/spec/lib/filter_parser_spec.rb +28 -0
  38. data/spec/parsing_nesting/build_tree_spec.rb +238 -0
  39. data/spec/parsing_nesting/consuming_spec.rb +49 -0
  40. data/spec/parsing_nesting/to_solr_spec.rb +360 -0
  41. data/spec/rcov.opts +3 -0
  42. data/spec/spec.opts +4 -0
  43. data/spec/spec_helper.rb +9 -0
  44. data/spec/support/blacklight_mock.rb +5 -0
  45. data/uninstall.rb +1 -0
  46. metadata +164 -0
@@ -0,0 +1,129 @@
1
+
2
+ /* Kind of wacky stuff to make scrolling on limit column work right. */
3
+
4
+ .input_columns {
5
+ position: relative;
6
+ }
7
+
8
+ .limit_column {
9
+ position: absolute;
10
+ top: 0;
11
+ bottom: 0;
12
+ right: 0;
13
+ width: 49.1%;
14
+ overflow-y: hidden;
15
+ }
16
+
17
+ .limit_input {
18
+ position: absolute;
19
+ top: 6em;
20
+ bottom: 0;
21
+ right: 0;
22
+ left: 0;
23
+ overflow-y: auto;
24
+ }
25
+
26
+ /* Random styles */
27
+
28
+ .advanced_search_field label {
29
+ display:block;
30
+ }
31
+
32
+ .advanced_search_field input {
33
+ margin-bottom: 0.666em;
34
+ width: 80%;
35
+ }
36
+
37
+ form.advanced label {
38
+ font-weight:normal;
39
+ }
40
+
41
+ form.advanced h2 {
42
+ font-weight: normal;
43
+ background-color: #EEEEEE;
44
+ height: 3em;
45
+ }
46
+
47
+ form.advanced .limit_column ul {
48
+ margin: 1em;
49
+ }
50
+
51
+ form.advanced .limit_column li {
52
+ list-style: none;
53
+ padding: 0.1em 0.4em;
54
+ font-size: 80%;
55
+ }
56
+
57
+ form.advanced .facet_item h3 {
58
+ cursor: pointer;
59
+ }
60
+
61
+ form.advanced .adv_facet_selections {
62
+ color:green;
63
+ font-size: 80%;
64
+ display: block;
65
+ margin-top: 0.25em;
66
+ }
67
+
68
+ form.advanced .advanced_button {
69
+ -moz-border-radius: 4px 4px 4px 4px;
70
+ -webkit-border-radius: 4px 4px 4px 4px;
71
+ border-radius: 4px 4px 4px 4px;
72
+ background-color: #F6F6F6;
73
+ border: 1px solid #CCCCCC;
74
+ color: #2E4F81;
75
+ display: inline-block;
76
+ float: right;
77
+ margin-right: 1em;
78
+ padding: 0.4em 1em;
79
+ text-decoration: none;
80
+ }
81
+
82
+ form.advanced .reset {
83
+ }
84
+
85
+
86
+
87
+ .advanced_help li {
88
+ margin-top: 0.5em;
89
+ margin-bottom: 0.5em;
90
+ }
91
+
92
+ form.advanced .sort_submit_buttons {
93
+ background-color: #EEEEEE;
94
+ padding: 1em;
95
+ margin-top: 1em;
96
+ overflow: hidden; /* trick into containing floats please */
97
+ }
98
+
99
+ form.advanced .constraints {
100
+ padding: 1em;
101
+ margin-top: 1em;
102
+ background-color: #E2EDFE;
103
+ border: 1px solid #C4DAFE;
104
+ }
105
+
106
+ form.advanced .constraints h4 {
107
+ margin-bottom: 0.66em;
108
+ }
109
+
110
+ form.advanced .constraints .constraint {
111
+ display:block;
112
+ padding-left:2em;
113
+ text-indent:-2em;
114
+ }
115
+
116
+ form.advanced .constraints .constraint .filterName {
117
+ font-weight: bold;
118
+ margin-right: 0.66em;
119
+ }
120
+
121
+ form.advanced .column > h2 {
122
+ padding: 0.33em;
123
+ }
124
+ form.advanced .column > div {
125
+ padding-left: 0.33em;
126
+ padding-right: 0.33em;
127
+ }
128
+
129
+
@@ -0,0 +1,160 @@
1
+ = The "Parsing Nesting" parser and Solr query transformer
2
+
3
+ == User-entered queries handled
4
+
5
+ * simple lists of terms and phrases, possibly with + or -, are translated
6
+ directly to dismax queries, respecting whatever mm is operative for the
7
+ Blacklight search field definition (either as a specified mm param in the
8
+ search field definition, or in Solr request handler default)
9
+ * one two three
10
+ * one +two -"three phrase"
11
+
12
+ * AND/OR/NOT operators can be used for boolean logic. Parenthesis can
13
+ be used to be clear about grouping, or to make arbitrarily complex
14
+ nested logic. These operators always apply to only the immediately
15
+ adjacent terms, unless parens are used, and "OR" 'binds more tightly'
16
+ than 'AND'
17
+ * big OR small AND blue OR green === (big OR small) AND (blue OR green)
18
+ * one AND two OR three AND four === one AND (two OR three) AND four
19
+ * alternative, with different meaning: (one AND two) OR (three AND four)
20
+ * NOT one two three === (NOT one) two three === -one two three
21
+ * alternative, with different meaning: NOT(one two three)
22
+
23
+ * lists of terms can be combined with AND/OR/NOT in a variety of ways
24
+ * one two three OR four === one two (three OR four)
25
+ * (one two three) AND (big small medium)
26
+ * NOT(one two) three ((four OR -five) AND (blue green red))
27
+ * Note that some of these latter ones can have confusing semantics
28
+ if your dismax mm isn't 100%.
29
+
30
+ For instance (one two three) will be
31
+ a dismax query, let's say mm=1, then the result set would actually
32
+ be the equivalent of:
33
+ (one OR two OR three).
34
+ NOT(one two three) will be an actual complementary NOT, the
35
+ complementary/inverted set -- so NOT(one two three)
36
+ (if you had dismax mm=1) will essentially
37
+ have the same semantics as:
38
+ NOT(one OR two OR three)
39
+ which isn't
40
+ neccesarily what the user is expecting. But if the user always uses
41
+ explicit boolean connectors, they can exert complete control over
42
+ the semantics, and not get the 'fuzziness'. Alternately, the local
43
+ implementer could use only mm=100%, in which case everything is much
44
+ less fuzzy/hard-to-predict
45
+
46
+ == Conversion to Solr
47
+
48
+ As mentioned, a straight list of terms such as (in the most complicated)
49
+ case: one -two +"three four" >> is translated directly to a dismax
50
+ query for those entered terms. Using the qf/pf/mm/etc you have configured
51
+ for the Blacklight search_field in question. (While by default the advanced
52
+ search plugin uses exactly the same field configurations you already have
53
+ for simple search, you could also choose to pass in different ones for
54
+ advanced search, perhaps setting mm to 100% if desired for adv search)
55
+
56
+ There are a few motivations for doing things this way:
57
+
58
+ * To be consistent with simple search, so moving to advanced is less of a
59
+ conceptual break for the user. If you take a legal simple search, and
60
+ enter it in a given field in advanced search, it will work exactly the
61
+ same as it did in simple (even if mm is not 100% in simple), rather than
62
+ having entirely different semantics.
63
+ * Taking advantage of that, one might eventually want to actually use this
64
+ parser in simple search, so user can enter single-field boolean expressions
65
+ even in simple/basic search.
66
+ * In the future, we might want to provide actual fielded searches in an
67
+ 'expert' mode. +title: foo AND author:bar+ or
68
+ +(title:(one two) AND author:(three four)) OR isbn:X+
69
+ For explicit fielded searching, it is convenient if you can combine
70
+ dismax searches.
71
+
72
+ Once you start putting boolean operators AND, OR, NOT in, the query will
73
+ no longer neccesarily be converted to a _single_ nested dismax query, a single
74
+ user-entered string may be converted to multiple nested queries. In some
75
+ common cases, multiple clauses will still be collapsed into fewer dismax
76
+ queries than the 'naive' translation. Examples:
77
+
78
+ * one two three (blue AND green AND -purple)
79
+ _query_:"{!dismax}one two three +four +five -purple"
80
+ * one two three (blue OR green OR purple)
81
+ _query_:"{!dismax}one two three" AND _query_:"{!dismax mm=1}blue green purple"
82
+
83
+ However, if you use complicated crazy nesting, you can get a lot of nested
84
+ queries generated:
85
+ * ((one two) AND (three OR four)) OR (blue AND NOT (green OR purple))
86
+ ( ( _query_:"{!dismax }one two" AND _query_:"{!dismax mm=1}three four" ) OR ( _query_:"{!dismax }blue" AND NOT _query_:"{!dismax mm=1}green purple" ) )
87
+
88
+ = Note on pure negative queries
89
+
90
+ In Solr 1.4.1, the dismax query parser can't handle queries with only "-"
91
+ excluded terms. And while the lucene query parser can handle certain types
92
+ of pure negative queries, it can't properly handle a NOT(x) as one of the
93
+ operands of the "OR". Our query generation strategy notices these cases
94
+ and transforms to semantically equivalent query that can be handled by
95
+ Solr properly. At least it tries, this is the least clean part of the code.
96
+ But there are specs showing it works for some fairly complicated queries.
97
+
98
+ * -one -two =>is transformed to=> NOT _query_:"{!dismax mm=1}one two"
99
+ * $x OR NOT $y =>is transformed to=> $x OR (*:* AND NOT $y)
100
+
101
+ This works with very complicated queries when the bad pure negative part
102
+ would be just a sub-clause or sub-query. Sometimes the result is not
103
+ the most concise query possible, but it should hold to it's semantics.
104
+
105
+ * -red -blue (-foo OR -bar) (big OR NOT small)
106
+ turns into ==>
107
+ NOT _query_:"{!dismax mm=1}red blue" AND NOT _query_:"{!dismax mm=100%}foo bar" AND ( _query_:\"{!dismax }big" OR (*:* AND NOT _query_:"{!dismax }small") )
108
+
109
+ == Why not use e-dismax?
110
+
111
+ That would be a potentially reasonable choice. Why didn't I?
112
+
113
+ One, at the time of this writing, edismax is not available in a tagged stable
114
+ Solr release, and I write code for Blacklight that works with tagged stable
115
+ releases.
116
+
117
+ Two, edismax doesn't neccesarily entirely support the semantics I want,
118
+ especially for features I would like to add in the future. I am not sure
119
+ exactly what edismax does with complicated deeply nested expressions.
120
+ For fielded searches, dismax supports actual individual solr fields, but not
121
+ the "fields" as dismax qf aggregates that we need. These things could
122
+ be added to dismax, but with my lack of Java chops and familiarity with
123
+ Solr code, it would have taken me much longer to do (and been much less
124
+ enjoyable).
125
+
126
+ I think it may be a reasonable choice to seperate concerns between Solr
127
+ and the app layer like this, let Solr handle basic search expressions,
128
+ but let the app layer handle more complicated query parsing, translating
129
+ to those simple expressions.
130
+
131
+ On the other hand, there are definite downsides to this approach. Including
132
+ having to deal with idiosyncracies of built-in query parsers ("pure
133
+ negative" behavior), depend upon other idiosyncracies (dismax does not
134
+ apply mm to -excluded terms), etc. And not being able to share the code
135
+ at the Solr/Java level.
136
+
137
+ In the future, a different approach that might be best of all could be
138
+ using the not-yet-finished XML query parser, to do initial parsing in
139
+ ruby at the app level, but translate to specified lucene primitives using
140
+ XML query parser, instead of having to translate to lucene/dismax query
141
+ parsers.
142
+
143
+ == Future Enhancement Ideas
144
+ Just ideas.
145
+
146
+ 1. Allow expert "fielded" searches. title:foo
147
+ which would correspond not to actual solr index field "title", but
148
+ to a Blacklight-configured "search field" qf/pf.
149
+ 2. Insert this app-level parser even in "simple" search, so users
150
+ can use boolean operators even in a single-fielded simple search.
151
+ 3. Allow a different set of qf to be used for any "phrase term", so
152
+ phrases would search only on non-stemming fields. This would be cool,
153
+ but kind of do weird things with dismax mm effects, since it would
154
+ mean all phrases would be extracted into seperate nested queries.
155
+ 4. Better error handling of syntax errors in query entry. Both in the
156
+ plugin as a whole, error messages should be displayed on the input
157
+ screen, so the entry can be fixed. But also using Parslet for parsing,
158
+ we can potentially deliver better error messages guessing what they
159
+ got wrong where in their entry.
160
+
@@ -0,0 +1,78 @@
1
+ require 'rubygems'
2
+ require 'parslet'
3
+
4
+ # Parslet uses Object#tap, which is in ruby 1.8.7+, but not 1.8.6.
5
+ # But it's easy enough to implement in pure ruby, let's monkey patch
6
+ # it in if it's not there, so we'll still work with 1.8.6
7
+ unless Object.method_defined?(:tap)
8
+ class Object
9
+ def tap
10
+ yield(self)
11
+ return self
12
+ end
13
+ end
14
+ end
15
+ module ParsingNesting
16
+ class Grammar < Parslet::Parser
17
+ root :query
18
+
19
+ # query is actually a list of expressions.
20
+ rule :query do
21
+ (spacing? >> (expression | paren_unit ) >> spacing?).repeat
22
+ end
23
+
24
+ rule :paren_list do
25
+ (str('(') >> query >> str(')')).as(:list)
26
+ end
27
+
28
+ rule :paren_unit do
29
+ (str('(') >> spacing? >> (expression ) >> spacing? >> str(')')) |
30
+ paren_list
31
+ end
32
+
33
+ # Note well: It was tricky to parse the thing we want where you can
34
+ # have a flat list with boolean operators, but where 'OR' takes precedence.
35
+ # eg "A AND B OR C AND C" or "A OR B AND C OR D". Tricky to parse at all,
36
+ # tricky to make precedence work. Important things that seem to make it work:
37
+ # and_list comes BEFORE or_list in :expression.
38
+ # and_list's operand can be an or_list, but NOT vice versa
39
+ # There are others, it was an iterative process with testing.
40
+ rule :expression do
41
+ (and_list | or_list | unary_expression )
42
+ end
43
+
44
+ rule :and_list do
45
+ ((or_list | unary_expression | paren_unit) >>
46
+ (spacing >> str("AND") >> spacing >> (or_list | unary_expression | paren_unit)).repeat(1)).as(:and_list)
47
+ end
48
+
49
+ rule :or_list do
50
+ ((unary_expression | paren_unit) >>
51
+ (spacing >> str("OR") >> spacing >> (unary_expression | paren_unit)).repeat(1)).as(:or_list)
52
+ end
53
+
54
+ rule :unary_expression do
55
+ (str('+') >> (phrase | token)).as(:mandatory) |
56
+ (str('-') >> (phrase | token)).as(:excluded) |
57
+ (str('NOT') >> spacing? >> (unary_expression | paren_unit)).as(:not_expression) |
58
+ (phrase | token)
59
+ end
60
+
61
+ rule :token do
62
+ match['^ ")('].repeat(1).as(:token)
63
+ end
64
+ rule :phrase do
65
+ match('"') >> match['^"'].repeat(1).as(:phrase) >> match('"')
66
+ end
67
+
68
+
69
+ rule :spacing do
70
+ match[' '].repeat(1)
71
+ end
72
+ rule :spacing? do
73
+ spacing.maybe
74
+ end
75
+ end
76
+
77
+
78
+ end