blacklight_advanced_search 1.0.0pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE +14 -0
  3. data/README.rdoc +172 -0
  4. data/Rakefile +6 -0
  5. data/VERSION +1 -0
  6. data/app/controllers/advanced_controller.rb +61 -0
  7. data/app/controllers/application_controller.rb +5 -0
  8. data/app/helpers/advanced_helper.rb +40 -0
  9. data/app/views/advanced/_advanced_search_facets.html.erb +16 -0
  10. data/app/views/advanced/_advanced_search_fields.html.erb +6 -0
  11. data/app/views/advanced/_advanced_search_form.html.erb +48 -0
  12. data/app/views/advanced/_advanced_search_help.html.erb +22 -0
  13. data/app/views/advanced/index.html.erb +10 -0
  14. data/app/views/blacklight_advanced_search/_facet_limit.html.erb +25 -0
  15. data/blacklight_advanced_search.gemspec +24 -0
  16. data/config/routes.rb +3 -0
  17. data/install.rb +0 -0
  18. data/lib/blacklight_advanced_search/advanced_query_parser.rb +61 -0
  19. data/lib/blacklight_advanced_search/catalog_helper_override.rb +53 -0
  20. data/lib/blacklight_advanced_search/controller.rb +101 -0
  21. data/lib/blacklight_advanced_search/engine.rb +47 -0
  22. data/lib/blacklight_advanced_search/filter_parser.rb +13 -0
  23. data/lib/blacklight_advanced_search/parsing_nesting_parser.rb +18 -0
  24. data/lib/blacklight_advanced_search/render_constraints_override.rb +96 -0
  25. data/lib/blacklight_advanced_search/version.rb +10 -0
  26. data/lib/blacklight_advanced_search.rb +74 -0
  27. data/lib/generators/blacklight_advanced_search/assets_generator.rb +25 -0
  28. data/lib/generators/blacklight_advanced_search/blacklight_advanced_search_generator.rb +11 -0
  29. data/lib/generators/blacklight_advanced_search/templates/_search_form.html.erb +13 -0
  30. data/lib/generators/blacklight_advanced_search/templates/blacklight_advanced_search_config.rb +86 -0
  31. data/lib/generators/blacklight_advanced_search/templates/public/javascripts/blacklight_advanced_search_javascript.js +62 -0
  32. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/advanced_results.css +41 -0
  33. data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/blacklight_advanced_search_styles.css +129 -0
  34. data/lib/parsing_nesting/Readme.rdoc +160 -0
  35. data/lib/parsing_nesting/grammar.rb +78 -0
  36. data/lib/parsing_nesting/tree.rb +457 -0
  37. data/spec/lib/filter_parser_spec.rb +28 -0
  38. data/spec/parsing_nesting/build_tree_spec.rb +238 -0
  39. data/spec/parsing_nesting/consuming_spec.rb +49 -0
  40. data/spec/parsing_nesting/to_solr_spec.rb +360 -0
  41. data/spec/rcov.opts +3 -0
  42. data/spec/spec.opts +4 -0
  43. data/spec/spec_helper.rb +9 -0
  44. data/spec/support/blacklight_mock.rb +5 -0
  45. data/uninstall.rb +1 -0
  46. metadata +164 -0
@@ -0,0 +1,129 @@
1
+
2
+ /* Kind of wacky stuff to make scrolling on limit column work right. */
3
+
4
+ .input_columns {
5
+ position: relative;
6
+ }
7
+
8
+ .limit_column {
9
+ position: absolute;
10
+ top: 0;
11
+ bottom: 0;
12
+ right: 0;
13
+ width: 49.1%;
14
+ overflow-y: hidden;
15
+ }
16
+
17
+ .limit_input {
18
+ position: absolute;
19
+ top: 6em;
20
+ bottom: 0;
21
+ right: 0;
22
+ left: 0;
23
+ overflow-y: auto;
24
+ }
25
+
26
+ /* Random styles */
27
+
28
+ .advanced_search_field label {
29
+ display:block;
30
+ }
31
+
32
+ .advanced_search_field input {
33
+ margin-bottom: 0.666em;
34
+ width: 80%;
35
+ }
36
+
37
+ form.advanced label {
38
+ font-weight:normal;
39
+ }
40
+
41
+ form.advanced h2 {
42
+ font-weight: normal;
43
+ background-color: #EEEEEE;
44
+ height: 3em;
45
+ }
46
+
47
+ form.advanced .limit_column ul {
48
+ margin: 1em;
49
+ }
50
+
51
+ form.advanced .limit_column li {
52
+ list-style: none;
53
+ padding: 0.1em 0.4em;
54
+ font-size: 80%;
55
+ }
56
+
57
+ form.advanced .facet_item h3 {
58
+ cursor: pointer;
59
+ }
60
+
61
+ form.advanced .adv_facet_selections {
62
+ color:green;
63
+ font-size: 80%;
64
+ display: block;
65
+ margin-top: 0.25em;
66
+ }
67
+
68
+ form.advanced .advanced_button {
69
+ -moz-border-radius: 4px 4px 4px 4px;
70
+ -webkit-border-radius: 4px 4px 4px 4px;
71
+ border-radius: 4px 4px 4px 4px;
72
+ background-color: #F6F6F6;
73
+ border: 1px solid #CCCCCC;
74
+ color: #2E4F81;
75
+ display: inline-block;
76
+ float: right;
77
+ margin-right: 1em;
78
+ padding: 0.4em 1em;
79
+ text-decoration: none;
80
+ }
81
+
82
+ form.advanced .reset {
83
+ }
84
+
85
+
86
+
87
+ .advanced_help li {
88
+ margin-top: 0.5em;
89
+ margin-bottom: 0.5em;
90
+ }
91
+
92
+ form.advanced .sort_submit_buttons {
93
+ background-color: #EEEEEE;
94
+ padding: 1em;
95
+ margin-top: 1em;
96
+ overflow: hidden; /* trick into containing floats please */
97
+ }
98
+
99
+ form.advanced .constraints {
100
+ padding: 1em;
101
+ margin-top: 1em;
102
+ background-color: #E2EDFE;
103
+ border: 1px solid #C4DAFE;
104
+ }
105
+
106
+ form.advanced .constraints h4 {
107
+ margin-bottom: 0.66em;
108
+ }
109
+
110
+ form.advanced .constraints .constraint {
111
+ display:block;
112
+ padding-left:2em;
113
+ text-indent:-2em;
114
+ }
115
+
116
+ form.advanced .constraints .constraint .filterName {
117
+ font-weight: bold;
118
+ margin-right: 0.66em;
119
+ }
120
+
121
+ form.advanced .column > h2 {
122
+ padding: 0.33em;
123
+ }
124
+ form.advanced .column > div {
125
+ padding-left: 0.33em;
126
+ padding-right: 0.33em;
127
+ }
128
+
129
+
@@ -0,0 +1,160 @@
1
+ = The "Parsing Nesting" parser and Solr query transformer
2
+
3
+ == User-entered queries handled
4
+
5
+ * simple lists of terms and phrases, possibly with + or -, are translated
6
+ directly to dismax queries, respecting whatever mm is operative for the
7
+ Blacklight search field definition (either as a specified mm param in the
8
+ search field definition, or in Solr request handler default)
9
+ * one two three
10
+ * one +two -"three phrase"
11
+
12
+ * AND/OR/NOT operators can be used for boolean logic. Parenthesis can
13
+ be used to be clear about grouping, or to make arbitrarily complex
14
+ nested logic. These operators always apply to only the immediately
15
+ adjacent terms, unless parens are used, and "OR" 'binds more tightly'
16
+ than 'AND'
17
+ * big OR small AND blue OR green === (big OR small) AND (blue OR green)
18
+ * one AND two OR three AND four === one AND (two OR three) AND four
19
+ * alternative, with different meaning: (one AND two) OR (three AND four)
20
+ * NOT one two three === (NOT one) two three === -one two three
21
+ * alternative, with different meaning: NOT(one two three)
22
+
23
+ * lists of terms can be combined with AND/OR/NOT in a variety of ways
24
+ * one two three OR four === one two (three OR four)
25
+ * (one two three) AND (big small medium)
26
+ * NOT(one two) three ((four OR -five) AND (blue green red))
27
+ * Note that some of these latter ones can have confusing semantics
28
+ if your dismax mm isn't 100%.
29
+
30
+ For instance (one two three) will be
31
+ a dismax query, let's say mm=1, then the result set would actually
32
+ be the equivalent of:
33
+ (one OR two OR three).
34
+ NOT(one two three) will be an actual complementary NOT, the
35
+ complementary/inverted set -- so NOT(one two three)
36
+ (if you had dismax mm=1) will essentially
37
+ have the same semantics as:
38
+ NOT(one OR two OR three)
39
+ which isn't
40
+ neccesarily what the user is expecting. But if the user always uses
41
+ explicit boolean connectors, they can exert complete control over
42
+ the semantics, and not get the 'fuzziness'. Alternately, the local
43
+ implementer could use only mm=100%, in which case everything is much
44
+ less fuzzy/hard-to-predict
45
+
46
+ == Conversion to Solr
47
+
48
+ As mentioned, a straight list of terms such as (in the most complicated)
49
+ case: one -two +"three four" >> is translated directly to a dismax
50
+ query for those entered terms. Using the qf/pf/mm/etc you have configured
51
+ for the Blacklight search_field in question. (While by default the advanced
52
+ search plugin uses exactly the same field configurations you already have
53
+ for simple search, you could also choose to pass in different ones for
54
+ advanced search, perhaps setting mm to 100% if desired for adv search)
55
+
56
+ There are a few motivations for doing things this way:
57
+
58
+ * To be consistent with simple search, so moving to advanced is less of a
59
+ conceptual break for the user. If you take a legal simple search, and
60
+ enter it in a given field in advanced search, it will work exactly the
61
+ same as it did in simple (even if mm is not 100% in simple), rather than
62
+ having entirely different semantics.
63
+ * Taking advantage of that, one might eventually want to actually use this
64
+ parser in simple search, so user can enter single-field boolean expressions
65
+ even in simple/basic search.
66
+ * In the future, we might want to provide actual fielded searches in an
67
+ 'expert' mode. +title: foo AND author:bar+ or
68
+ +(title:(one two) AND author:(three four)) OR isbn:X+
69
+ For explicit fielded searching, it is convenient if you can combine
70
+ dismax searches.
71
+
72
+ Once you start putting boolean operators AND, OR, NOT in, the query will
73
+ no longer neccesarily be converted to a _single_ nested dismax query, a single
74
+ user-entered string may be converted to multiple nested queries. In some
75
+ common cases, multiple clauses will still be collapsed into fewer dismax
76
+ queries than the 'naive' translation. Examples:
77
+
78
+ * one two three (blue AND green AND -purple)
79
+ _query_:"{!dismax}one two three +four +five -purple"
80
+ * one two three (blue OR green OR purple)
81
+ _query_:"{!dismax}one two three" AND _query_:"{!dismax mm=1}blue green purple"
82
+
83
+ However, if you use complicated crazy nesting, you can get a lot of nested
84
+ queries generated:
85
+ * ((one two) AND (three OR four)) OR (blue AND NOT (green OR purple))
86
+ ( ( _query_:"{!dismax }one two" AND _query_:"{!dismax mm=1}three four" ) OR ( _query_:"{!dismax }blue" AND NOT _query_:"{!dismax mm=1}green purple" ) )
87
+
88
+ = Note on pure negative queries
89
+
90
+ In Solr 1.4.1, the dismax query parser can't handle queries with only "-"
91
+ excluded terms. And while the lucene query parser can handle certain types
92
+ of pure negative queries, it can't properly handle a NOT(x) as one of the
93
+ operands of the "OR". Our query generation strategy notices these cases
94
+ and transforms to semantically equivalent query that can be handled by
95
+ Solr properly. At least it tries, this is the least clean part of the code.
96
+ But there are specs showing it works for some fairly complicated queries.
97
+
98
+ * -one -two =>is transformed to=> NOT _query_:"{!dismax mm=1}one two"
99
+ * $x OR NOT $y =>is transformed to=> $x OR (*:* AND NOT $y)
100
+
101
+ This works with very complicated queries when the bad pure negative part
102
+ would be just a sub-clause or sub-query. Sometimes the result is not
103
+ the most concise query possible, but it should hold to it's semantics.
104
+
105
+ * -red -blue (-foo OR -bar) (big OR NOT small)
106
+ turns into ==>
107
+ NOT _query_:"{!dismax mm=1}red blue" AND NOT _query_:"{!dismax mm=100%}foo bar" AND ( _query_:\"{!dismax }big" OR (*:* AND NOT _query_:"{!dismax }small") )
108
+
109
+ == Why not use e-dismax?
110
+
111
+ That would be a potentially reasonable choice. Why didn't I?
112
+
113
+ One, at the time of this writing, edismax is not available in a tagged stable
114
+ Solr release, and I write code for Blacklight that works with tagged stable
115
+ releases.
116
+
117
+ Two, edismax doesn't neccesarily entirely support the semantics I want,
118
+ especially for features I would like to add in the future. I am not sure
119
+ exactly what edismax does with complicated deeply nested expressions.
120
+ For fielded searches, dismax supports actual individual solr fields, but not
121
+ the "fields" as dismax qf aggregates that we need. These things could
122
+ be added to dismax, but with my lack of Java chops and familiarity with
123
+ Solr code, it would have taken me much longer to do (and been much less
124
+ enjoyable).
125
+
126
+ I think it may be a reasonable choice to seperate concerns between Solr
127
+ and the app layer like this, let Solr handle basic search expressions,
128
+ but let the app layer handle more complicated query parsing, translating
129
+ to those simple expressions.
130
+
131
+ On the other hand, there are definite downsides to this approach. Including
132
+ having to deal with idiosyncracies of built-in query parsers ("pure
133
+ negative" behavior), depend upon other idiosyncracies (dismax does not
134
+ apply mm to -excluded terms), etc. And not being able to share the code
135
+ at the Solr/Java level.
136
+
137
+ In the future, a different approach that might be best of all could be
138
+ using the not-yet-finished XML query parser, to do initial parsing in
139
+ ruby at the app level, but translate to specified lucene primitives using
140
+ XML query parser, instead of having to translate to lucene/dismax query
141
+ parsers.
142
+
143
+ == Future Enhancement Ideas
144
+ Just ideas.
145
+
146
+ 1. Allow expert "fielded" searches. title:foo
147
+ which would correspond not to actual solr index field "title", but
148
+ to a Blacklight-configured "search field" qf/pf.
149
+ 2. Insert this app-level parser even in "simple" search, so users
150
+ can use boolean operators even in a single-fielded simple search.
151
+ 3. Allow a different set of qf to be used for any "phrase term", so
152
+ phrases would search only on non-stemming fields. This would be cool,
153
+ but kind of do weird things with dismax mm effects, since it would
154
+ mean all phrases would be extracted into seperate nested queries.
155
+ 4. Better error handling of syntax errors in query entry. Both in the
156
+ plugin as a whole, error messages should be displayed on the input
157
+ screen, so the entry can be fixed. But also using Parslet for parsing,
158
+ we can potentially deliver better error messages guessing what they
159
+ got wrong where in their entry.
160
+
@@ -0,0 +1,78 @@
1
+ require 'rubygems'
2
+ require 'parslet'
3
+
4
+ # Parslet uses Object#tap, which is in ruby 1.8.7+, but not 1.8.6.
5
+ # But it's easy enough to implement in pure ruby, let's monkey patch
6
+ # it in if it's not there, so we'll still work with 1.8.6
7
+ unless Object.method_defined?(:tap)
8
+ class Object
9
+ def tap
10
+ yield(self)
11
+ return self
12
+ end
13
+ end
14
+ end
15
+ module ParsingNesting
16
+ class Grammar < Parslet::Parser
17
+ root :query
18
+
19
+ # query is actually a list of expressions.
20
+ rule :query do
21
+ (spacing? >> (expression | paren_unit ) >> spacing?).repeat
22
+ end
23
+
24
+ rule :paren_list do
25
+ (str('(') >> query >> str(')')).as(:list)
26
+ end
27
+
28
+ rule :paren_unit do
29
+ (str('(') >> spacing? >> (expression ) >> spacing? >> str(')')) |
30
+ paren_list
31
+ end
32
+
33
+ # Note well: It was tricky to parse the thing we want where you can
34
+ # have a flat list with boolean operators, but where 'OR' takes precedence.
35
+ # eg "A AND B OR C AND C" or "A OR B AND C OR D". Tricky to parse at all,
36
+ # tricky to make precedence work. Important things that seem to make it work:
37
+ # and_list comes BEFORE or_list in :expression.
38
+ # and_list's operand can be an or_list, but NOT vice versa
39
+ # There are others, it was an iterative process with testing.
40
+ rule :expression do
41
+ (and_list | or_list | unary_expression )
42
+ end
43
+
44
+ rule :and_list do
45
+ ((or_list | unary_expression | paren_unit) >>
46
+ (spacing >> str("AND") >> spacing >> (or_list | unary_expression | paren_unit)).repeat(1)).as(:and_list)
47
+ end
48
+
49
+ rule :or_list do
50
+ ((unary_expression | paren_unit) >>
51
+ (spacing >> str("OR") >> spacing >> (unary_expression | paren_unit)).repeat(1)).as(:or_list)
52
+ end
53
+
54
+ rule :unary_expression do
55
+ (str('+') >> (phrase | token)).as(:mandatory) |
56
+ (str('-') >> (phrase | token)).as(:excluded) |
57
+ (str('NOT') >> spacing? >> (unary_expression | paren_unit)).as(:not_expression) |
58
+ (phrase | token)
59
+ end
60
+
61
+ rule :token do
62
+ match['^ ")('].repeat(1).as(:token)
63
+ end
64
+ rule :phrase do
65
+ match('"') >> match['^"'].repeat(1).as(:phrase) >> match('"')
66
+ end
67
+
68
+
69
+ rule :spacing do
70
+ match[' '].repeat(1)
71
+ end
72
+ rule :spacing? do
73
+ spacing.maybe
74
+ end
75
+ end
76
+
77
+
78
+ end