blacklight_advanced_search 1.0.0pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/LICENSE +14 -0
- data/README.rdoc +172 -0
- data/Rakefile +6 -0
- data/VERSION +1 -0
- data/app/controllers/advanced_controller.rb +61 -0
- data/app/controllers/application_controller.rb +5 -0
- data/app/helpers/advanced_helper.rb +40 -0
- data/app/views/advanced/_advanced_search_facets.html.erb +16 -0
- data/app/views/advanced/_advanced_search_fields.html.erb +6 -0
- data/app/views/advanced/_advanced_search_form.html.erb +48 -0
- data/app/views/advanced/_advanced_search_help.html.erb +22 -0
- data/app/views/advanced/index.html.erb +10 -0
- data/app/views/blacklight_advanced_search/_facet_limit.html.erb +25 -0
- data/blacklight_advanced_search.gemspec +24 -0
- data/config/routes.rb +3 -0
- data/install.rb +0 -0
- data/lib/blacklight_advanced_search/advanced_query_parser.rb +61 -0
- data/lib/blacklight_advanced_search/catalog_helper_override.rb +53 -0
- data/lib/blacklight_advanced_search/controller.rb +101 -0
- data/lib/blacklight_advanced_search/engine.rb +47 -0
- data/lib/blacklight_advanced_search/filter_parser.rb +13 -0
- data/lib/blacklight_advanced_search/parsing_nesting_parser.rb +18 -0
- data/lib/blacklight_advanced_search/render_constraints_override.rb +96 -0
- data/lib/blacklight_advanced_search/version.rb +10 -0
- data/lib/blacklight_advanced_search.rb +74 -0
- data/lib/generators/blacklight_advanced_search/assets_generator.rb +25 -0
- data/lib/generators/blacklight_advanced_search/blacklight_advanced_search_generator.rb +11 -0
- data/lib/generators/blacklight_advanced_search/templates/_search_form.html.erb +13 -0
- data/lib/generators/blacklight_advanced_search/templates/blacklight_advanced_search_config.rb +86 -0
- data/lib/generators/blacklight_advanced_search/templates/public/javascripts/blacklight_advanced_search_javascript.js +62 -0
- data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/advanced_results.css +41 -0
- data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/blacklight_advanced_search_styles.css +129 -0
- data/lib/parsing_nesting/Readme.rdoc +160 -0
- data/lib/parsing_nesting/grammar.rb +78 -0
- data/lib/parsing_nesting/tree.rb +457 -0
- data/spec/lib/filter_parser_spec.rb +28 -0
- data/spec/parsing_nesting/build_tree_spec.rb +238 -0
- data/spec/parsing_nesting/consuming_spec.rb +49 -0
- data/spec/parsing_nesting/to_solr_spec.rb +360 -0
- data/spec/rcov.opts +3 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/support/blacklight_mock.rb +5 -0
- data/uninstall.rb +1 -0
- metadata +164 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
|
2
|
+
/* Kind of wacky stuff to make scrolling on limit column work right. */
|
3
|
+
|
4
|
+
.input_columns {
|
5
|
+
position: relative;
|
6
|
+
}
|
7
|
+
|
8
|
+
.limit_column {
|
9
|
+
position: absolute;
|
10
|
+
top: 0;
|
11
|
+
bottom: 0;
|
12
|
+
right: 0;
|
13
|
+
width: 49.1%;
|
14
|
+
overflow-y: hidden;
|
15
|
+
}
|
16
|
+
|
17
|
+
.limit_input {
|
18
|
+
position: absolute;
|
19
|
+
top: 6em;
|
20
|
+
bottom: 0;
|
21
|
+
right: 0;
|
22
|
+
left: 0;
|
23
|
+
overflow-y: auto;
|
24
|
+
}
|
25
|
+
|
26
|
+
/* Random styles */
|
27
|
+
|
28
|
+
.advanced_search_field label {
|
29
|
+
display:block;
|
30
|
+
}
|
31
|
+
|
32
|
+
.advanced_search_field input {
|
33
|
+
margin-bottom: 0.666em;
|
34
|
+
width: 80%;
|
35
|
+
}
|
36
|
+
|
37
|
+
form.advanced label {
|
38
|
+
font-weight:normal;
|
39
|
+
}
|
40
|
+
|
41
|
+
form.advanced h2 {
|
42
|
+
font-weight: normal;
|
43
|
+
background-color: #EEEEEE;
|
44
|
+
height: 3em;
|
45
|
+
}
|
46
|
+
|
47
|
+
form.advanced .limit_column ul {
|
48
|
+
margin: 1em;
|
49
|
+
}
|
50
|
+
|
51
|
+
form.advanced .limit_column li {
|
52
|
+
list-style: none;
|
53
|
+
padding: 0.1em 0.4em;
|
54
|
+
font-size: 80%;
|
55
|
+
}
|
56
|
+
|
57
|
+
form.advanced .facet_item h3 {
|
58
|
+
cursor: pointer;
|
59
|
+
}
|
60
|
+
|
61
|
+
form.advanced .adv_facet_selections {
|
62
|
+
color:green;
|
63
|
+
font-size: 80%;
|
64
|
+
display: block;
|
65
|
+
margin-top: 0.25em;
|
66
|
+
}
|
67
|
+
|
68
|
+
form.advanced .advanced_button {
|
69
|
+
-moz-border-radius: 4px 4px 4px 4px;
|
70
|
+
-webkit-border-radius: 4px 4px 4px 4px;
|
71
|
+
border-radius: 4px 4px 4px 4px;
|
72
|
+
background-color: #F6F6F6;
|
73
|
+
border: 1px solid #CCCCCC;
|
74
|
+
color: #2E4F81;
|
75
|
+
display: inline-block;
|
76
|
+
float: right;
|
77
|
+
margin-right: 1em;
|
78
|
+
padding: 0.4em 1em;
|
79
|
+
text-decoration: none;
|
80
|
+
}
|
81
|
+
|
82
|
+
form.advanced .reset {
|
83
|
+
}
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
.advanced_help li {
|
88
|
+
margin-top: 0.5em;
|
89
|
+
margin-bottom: 0.5em;
|
90
|
+
}
|
91
|
+
|
92
|
+
form.advanced .sort_submit_buttons {
|
93
|
+
background-color: #EEEEEE;
|
94
|
+
padding: 1em;
|
95
|
+
margin-top: 1em;
|
96
|
+
overflow: hidden; /* trick into containing floats please */
|
97
|
+
}
|
98
|
+
|
99
|
+
form.advanced .constraints {
|
100
|
+
padding: 1em;
|
101
|
+
margin-top: 1em;
|
102
|
+
background-color: #E2EDFE;
|
103
|
+
border: 1px solid #C4DAFE;
|
104
|
+
}
|
105
|
+
|
106
|
+
form.advanced .constraints h4 {
|
107
|
+
margin-bottom: 0.66em;
|
108
|
+
}
|
109
|
+
|
110
|
+
form.advanced .constraints .constraint {
|
111
|
+
display:block;
|
112
|
+
padding-left:2em;
|
113
|
+
text-indent:-2em;
|
114
|
+
}
|
115
|
+
|
116
|
+
form.advanced .constraints .constraint .filterName {
|
117
|
+
font-weight: bold;
|
118
|
+
margin-right: 0.66em;
|
119
|
+
}
|
120
|
+
|
121
|
+
form.advanced .column > h2 {
|
122
|
+
padding: 0.33em;
|
123
|
+
}
|
124
|
+
form.advanced .column > div {
|
125
|
+
padding-left: 0.33em;
|
126
|
+
padding-right: 0.33em;
|
127
|
+
}
|
128
|
+
|
129
|
+
|
@@ -0,0 +1,160 @@
|
|
1
|
+
= The "Parsing Nesting" parser and Solr query transformer
|
2
|
+
|
3
|
+
== User-entered queries handled
|
4
|
+
|
5
|
+
* simple lists of terms and phrases, possibly with + or -, are translated
|
6
|
+
directly to dismax queries, respecting whatever mm is operative for the
|
7
|
+
Blacklight search field definition (either as a specified mm param in the
|
8
|
+
search field definition, or in Solr request handler default)
|
9
|
+
* one two three
|
10
|
+
* one +two -"three phrase"
|
11
|
+
|
12
|
+
* AND/OR/NOT operators can be used for boolean logic. Parenthesis can
|
13
|
+
be used to be clear about grouping, or to make arbitrarily complex
|
14
|
+
nested logic. These operators always apply to only the immediately
|
15
|
+
adjacent terms, unless parens are used, and "OR" 'binds more tightly'
|
16
|
+
than 'AND'
|
17
|
+
* big OR small AND blue OR green === (big OR small) AND (blue OR green)
|
18
|
+
* one AND two OR three AND four === one AND (two OR three) AND four
|
19
|
+
* alternative, with different meaning: (one AND two) OR (three AND four)
|
20
|
+
* NOT one two three === (NOT one) two three === -one two three
|
21
|
+
* alternative, with different meaning: NOT(one two three)
|
22
|
+
|
23
|
+
* lists of terms can be combined with AND/OR/NOT in a variety of ways
|
24
|
+
* one two three OR four === one two (three OR four)
|
25
|
+
* (one two three) AND (big small medium)
|
26
|
+
* NOT(one two) three ((four OR -five) AND (blue green red))
|
27
|
+
* Note that some of these latter ones can have confusing semantics
|
28
|
+
if your dismax mm isn't 100%.
|
29
|
+
|
30
|
+
For instance (one two three) will be
|
31
|
+
a dismax query, let's say mm=1, then the result set would actually
|
32
|
+
be the equivalent of:
|
33
|
+
(one OR two OR three).
|
34
|
+
NOT(one two three) will be an actual complementary NOT, the
|
35
|
+
complementary/inverted set -- so NOT(one two three)
|
36
|
+
(if you had dismax mm=1) will essentially
|
37
|
+
have the same semantics as:
|
38
|
+
NOT(one OR two OR three)
|
39
|
+
which isn't
|
40
|
+
neccesarily what the user is expecting. But if the user always uses
|
41
|
+
explicit boolean connectors, they can exert complete control over
|
42
|
+
the semantics, and not get the 'fuzziness'. Alternately, the local
|
43
|
+
implementer could use only mm=100%, in which case everything is much
|
44
|
+
less fuzzy/hard-to-predict
|
45
|
+
|
46
|
+
== Conversion to Solr
|
47
|
+
|
48
|
+
As mentioned, a straight list of terms such as (in the most complicated)
|
49
|
+
case: one -two +"three four" >> is translated directly to a dismax
|
50
|
+
query for those entered terms. Using the qf/pf/mm/etc you have configured
|
51
|
+
for the Blacklight search_field in question. (While by default the advanced
|
52
|
+
search plugin uses exactly the same field configurations you already have
|
53
|
+
for simple search, you could also choose to pass in different ones for
|
54
|
+
advanced search, perhaps setting mm to 100% if desired for adv search)
|
55
|
+
|
56
|
+
There are a few motivations for doing things this way:
|
57
|
+
|
58
|
+
* To be consistent with simple search, so moving to advanced is less of a
|
59
|
+
conceptual break for the user. If you take a legal simple search, and
|
60
|
+
enter it in a given field in advanced search, it will work exactly the
|
61
|
+
same as it did in simple (even if mm is not 100% in simple), rather than
|
62
|
+
having entirely different semantics.
|
63
|
+
* Taking advantage of that, one might eventually want to actually use this
|
64
|
+
parser in simple search, so user can enter single-field boolean expressions
|
65
|
+
even in simple/basic search.
|
66
|
+
* In the future, we might want to provide actual fielded searches in an
|
67
|
+
'expert' mode. +title: foo AND author:bar+ or
|
68
|
+
+(title:(one two) AND author:(three four)) OR isbn:X+
|
69
|
+
For explicit fielded searching, it is convenient if you can combine
|
70
|
+
dismax searches.
|
71
|
+
|
72
|
+
Once you start putting boolean operators AND, OR, NOT in, the query will
|
73
|
+
no longer neccesarily be converted to a _single_ nested dismax query, a single
|
74
|
+
user-entered string may be converted to multiple nested queries. In some
|
75
|
+
common cases, multiple clauses will still be collapsed into fewer dismax
|
76
|
+
queries than the 'naive' translation. Examples:
|
77
|
+
|
78
|
+
* one two three (blue AND green AND -purple)
|
79
|
+
_query_:"{!dismax}one two three +four +five -purple"
|
80
|
+
* one two three (blue OR green OR purple)
|
81
|
+
_query_:"{!dismax}one two three" AND _query_:"{!dismax mm=1}blue green purple"
|
82
|
+
|
83
|
+
However, if you use complicated crazy nesting, you can get a lot of nested
|
84
|
+
queries generated:
|
85
|
+
* ((one two) AND (three OR four)) OR (blue AND NOT (green OR purple))
|
86
|
+
( ( _query_:"{!dismax }one two" AND _query_:"{!dismax mm=1}three four" ) OR ( _query_:"{!dismax }blue" AND NOT _query_:"{!dismax mm=1}green purple" ) )
|
87
|
+
|
88
|
+
= Note on pure negative queries
|
89
|
+
|
90
|
+
In Solr 1.4.1, the dismax query parser can't handle queries with only "-"
|
91
|
+
excluded terms. And while the lucene query parser can handle certain types
|
92
|
+
of pure negative queries, it can't properly handle a NOT(x) as one of the
|
93
|
+
operands of the "OR". Our query generation strategy notices these cases
|
94
|
+
and transforms to semantically equivalent query that can be handled by
|
95
|
+
Solr properly. At least it tries, this is the least clean part of the code.
|
96
|
+
But there are specs showing it works for some fairly complicated queries.
|
97
|
+
|
98
|
+
* -one -two =>is transformed to=> NOT _query_:"{!dismax mm=1}one two"
|
99
|
+
* $x OR NOT $y =>is transformed to=> $x OR (*:* AND NOT $y)
|
100
|
+
|
101
|
+
This works with very complicated queries when the bad pure negative part
|
102
|
+
would be just a sub-clause or sub-query. Sometimes the result is not
|
103
|
+
the most concise query possible, but it should hold to it's semantics.
|
104
|
+
|
105
|
+
* -red -blue (-foo OR -bar) (big OR NOT small)
|
106
|
+
turns into ==>
|
107
|
+
NOT _query_:"{!dismax mm=1}red blue" AND NOT _query_:"{!dismax mm=100%}foo bar" AND ( _query_:\"{!dismax }big" OR (*:* AND NOT _query_:"{!dismax }small") )
|
108
|
+
|
109
|
+
== Why not use e-dismax?
|
110
|
+
|
111
|
+
That would be a potentially reasonable choice. Why didn't I?
|
112
|
+
|
113
|
+
One, at the time of this writing, edismax is not available in a tagged stable
|
114
|
+
Solr release, and I write code for Blacklight that works with tagged stable
|
115
|
+
releases.
|
116
|
+
|
117
|
+
Two, edismax doesn't neccesarily entirely support the semantics I want,
|
118
|
+
especially for features I would like to add in the future. I am not sure
|
119
|
+
exactly what edismax does with complicated deeply nested expressions.
|
120
|
+
For fielded searches, dismax supports actual individual solr fields, but not
|
121
|
+
the "fields" as dismax qf aggregates that we need. These things could
|
122
|
+
be added to dismax, but with my lack of Java chops and familiarity with
|
123
|
+
Solr code, it would have taken me much longer to do (and been much less
|
124
|
+
enjoyable).
|
125
|
+
|
126
|
+
I think it may be a reasonable choice to seperate concerns between Solr
|
127
|
+
and the app layer like this, let Solr handle basic search expressions,
|
128
|
+
but let the app layer handle more complicated query parsing, translating
|
129
|
+
to those simple expressions.
|
130
|
+
|
131
|
+
On the other hand, there are definite downsides to this approach. Including
|
132
|
+
having to deal with idiosyncracies of built-in query parsers ("pure
|
133
|
+
negative" behavior), depend upon other idiosyncracies (dismax does not
|
134
|
+
apply mm to -excluded terms), etc. And not being able to share the code
|
135
|
+
at the Solr/Java level.
|
136
|
+
|
137
|
+
In the future, a different approach that might be best of all could be
|
138
|
+
using the not-yet-finished XML query parser, to do initial parsing in
|
139
|
+
ruby at the app level, but translate to specified lucene primitives using
|
140
|
+
XML query parser, instead of having to translate to lucene/dismax query
|
141
|
+
parsers.
|
142
|
+
|
143
|
+
== Future Enhancement Ideas
|
144
|
+
Just ideas.
|
145
|
+
|
146
|
+
1. Allow expert "fielded" searches. title:foo
|
147
|
+
which would correspond not to actual solr index field "title", but
|
148
|
+
to a Blacklight-configured "search field" qf/pf.
|
149
|
+
2. Insert this app-level parser even in "simple" search, so users
|
150
|
+
can use boolean operators even in a single-fielded simple search.
|
151
|
+
3. Allow a different set of qf to be used for any "phrase term", so
|
152
|
+
phrases would search only on non-stemming fields. This would be cool,
|
153
|
+
but kind of do weird things with dismax mm effects, since it would
|
154
|
+
mean all phrases would be extracted into seperate nested queries.
|
155
|
+
4. Better error handling of syntax errors in query entry. Both in the
|
156
|
+
plugin as a whole, error messages should be displayed on the input
|
157
|
+
screen, so the entry can be fixed. But also using Parslet for parsing,
|
158
|
+
we can potentially deliver better error messages guessing what they
|
159
|
+
got wrong where in their entry.
|
160
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'parslet'
|
3
|
+
|
4
|
+
# Parslet uses Object#tap, which is in ruby 1.8.7+, but not 1.8.6.
|
5
|
+
# But it's easy enough to implement in pure ruby, let's monkey patch
|
6
|
+
# it in if it's not there, so we'll still work with 1.8.6
|
7
|
+
unless Object.method_defined?(:tap)
|
8
|
+
class Object
|
9
|
+
def tap
|
10
|
+
yield(self)
|
11
|
+
return self
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
module ParsingNesting
|
16
|
+
class Grammar < Parslet::Parser
|
17
|
+
root :query
|
18
|
+
|
19
|
+
# query is actually a list of expressions.
|
20
|
+
rule :query do
|
21
|
+
(spacing? >> (expression | paren_unit ) >> spacing?).repeat
|
22
|
+
end
|
23
|
+
|
24
|
+
rule :paren_list do
|
25
|
+
(str('(') >> query >> str(')')).as(:list)
|
26
|
+
end
|
27
|
+
|
28
|
+
rule :paren_unit do
|
29
|
+
(str('(') >> spacing? >> (expression ) >> spacing? >> str(')')) |
|
30
|
+
paren_list
|
31
|
+
end
|
32
|
+
|
33
|
+
# Note well: It was tricky to parse the thing we want where you can
|
34
|
+
# have a flat list with boolean operators, but where 'OR' takes precedence.
|
35
|
+
# eg "A AND B OR C AND C" or "A OR B AND C OR D". Tricky to parse at all,
|
36
|
+
# tricky to make precedence work. Important things that seem to make it work:
|
37
|
+
# and_list comes BEFORE or_list in :expression.
|
38
|
+
# and_list's operand can be an or_list, but NOT vice versa
|
39
|
+
# There are others, it was an iterative process with testing.
|
40
|
+
rule :expression do
|
41
|
+
(and_list | or_list | unary_expression )
|
42
|
+
end
|
43
|
+
|
44
|
+
rule :and_list do
|
45
|
+
((or_list | unary_expression | paren_unit) >>
|
46
|
+
(spacing >> str("AND") >> spacing >> (or_list | unary_expression | paren_unit)).repeat(1)).as(:and_list)
|
47
|
+
end
|
48
|
+
|
49
|
+
rule :or_list do
|
50
|
+
((unary_expression | paren_unit) >>
|
51
|
+
(spacing >> str("OR") >> spacing >> (unary_expression | paren_unit)).repeat(1)).as(:or_list)
|
52
|
+
end
|
53
|
+
|
54
|
+
rule :unary_expression do
|
55
|
+
(str('+') >> (phrase | token)).as(:mandatory) |
|
56
|
+
(str('-') >> (phrase | token)).as(:excluded) |
|
57
|
+
(str('NOT') >> spacing? >> (unary_expression | paren_unit)).as(:not_expression) |
|
58
|
+
(phrase | token)
|
59
|
+
end
|
60
|
+
|
61
|
+
rule :token do
|
62
|
+
match['^ ")('].repeat(1).as(:token)
|
63
|
+
end
|
64
|
+
rule :phrase do
|
65
|
+
match('"') >> match['^"'].repeat(1).as(:phrase) >> match('"')
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
rule :spacing do
|
70
|
+
match[' '].repeat(1)
|
71
|
+
end
|
72
|
+
rule :spacing? do
|
73
|
+
spacing.maybe
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
end
|