blacklight_advanced_search 1.0.0pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/LICENSE +14 -0
- data/README.rdoc +172 -0
- data/Rakefile +6 -0
- data/VERSION +1 -0
- data/app/controllers/advanced_controller.rb +61 -0
- data/app/controllers/application_controller.rb +5 -0
- data/app/helpers/advanced_helper.rb +40 -0
- data/app/views/advanced/_advanced_search_facets.html.erb +16 -0
- data/app/views/advanced/_advanced_search_fields.html.erb +6 -0
- data/app/views/advanced/_advanced_search_form.html.erb +48 -0
- data/app/views/advanced/_advanced_search_help.html.erb +22 -0
- data/app/views/advanced/index.html.erb +10 -0
- data/app/views/blacklight_advanced_search/_facet_limit.html.erb +25 -0
- data/blacklight_advanced_search.gemspec +24 -0
- data/config/routes.rb +3 -0
- data/install.rb +0 -0
- data/lib/blacklight_advanced_search/advanced_query_parser.rb +61 -0
- data/lib/blacklight_advanced_search/catalog_helper_override.rb +53 -0
- data/lib/blacklight_advanced_search/controller.rb +101 -0
- data/lib/blacklight_advanced_search/engine.rb +47 -0
- data/lib/blacklight_advanced_search/filter_parser.rb +13 -0
- data/lib/blacklight_advanced_search/parsing_nesting_parser.rb +18 -0
- data/lib/blacklight_advanced_search/render_constraints_override.rb +96 -0
- data/lib/blacklight_advanced_search/version.rb +10 -0
- data/lib/blacklight_advanced_search.rb +74 -0
- data/lib/generators/blacklight_advanced_search/assets_generator.rb +25 -0
- data/lib/generators/blacklight_advanced_search/blacklight_advanced_search_generator.rb +11 -0
- data/lib/generators/blacklight_advanced_search/templates/_search_form.html.erb +13 -0
- data/lib/generators/blacklight_advanced_search/templates/blacklight_advanced_search_config.rb +86 -0
- data/lib/generators/blacklight_advanced_search/templates/public/javascripts/blacklight_advanced_search_javascript.js +62 -0
- data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/advanced_results.css +41 -0
- data/lib/generators/blacklight_advanced_search/templates/public/stylesheets/blacklight_advanced_search_styles.css +129 -0
- data/lib/parsing_nesting/Readme.rdoc +160 -0
- data/lib/parsing_nesting/grammar.rb +78 -0
- data/lib/parsing_nesting/tree.rb +457 -0
- data/spec/lib/filter_parser_spec.rb +28 -0
- data/spec/parsing_nesting/build_tree_spec.rb +238 -0
- data/spec/parsing_nesting/consuming_spec.rb +49 -0
- data/spec/parsing_nesting/to_solr_spec.rb +360 -0
- data/spec/rcov.opts +3 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/support/blacklight_mock.rb +5 -0
- data/uninstall.rb +1 -0
- metadata +164 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
|
2
|
+
/* Kind of wacky stuff to make scrolling on limit column work right. */
|
3
|
+
|
4
|
+
.input_columns {
|
5
|
+
position: relative;
|
6
|
+
}
|
7
|
+
|
8
|
+
.limit_column {
|
9
|
+
position: absolute;
|
10
|
+
top: 0;
|
11
|
+
bottom: 0;
|
12
|
+
right: 0;
|
13
|
+
width: 49.1%;
|
14
|
+
overflow-y: hidden;
|
15
|
+
}
|
16
|
+
|
17
|
+
.limit_input {
|
18
|
+
position: absolute;
|
19
|
+
top: 6em;
|
20
|
+
bottom: 0;
|
21
|
+
right: 0;
|
22
|
+
left: 0;
|
23
|
+
overflow-y: auto;
|
24
|
+
}
|
25
|
+
|
26
|
+
/* Random styles */
|
27
|
+
|
28
|
+
.advanced_search_field label {
|
29
|
+
display:block;
|
30
|
+
}
|
31
|
+
|
32
|
+
.advanced_search_field input {
|
33
|
+
margin-bottom: 0.666em;
|
34
|
+
width: 80%;
|
35
|
+
}
|
36
|
+
|
37
|
+
form.advanced label {
|
38
|
+
font-weight:normal;
|
39
|
+
}
|
40
|
+
|
41
|
+
form.advanced h2 {
|
42
|
+
font-weight: normal;
|
43
|
+
background-color: #EEEEEE;
|
44
|
+
height: 3em;
|
45
|
+
}
|
46
|
+
|
47
|
+
form.advanced .limit_column ul {
|
48
|
+
margin: 1em;
|
49
|
+
}
|
50
|
+
|
51
|
+
form.advanced .limit_column li {
|
52
|
+
list-style: none;
|
53
|
+
padding: 0.1em 0.4em;
|
54
|
+
font-size: 80%;
|
55
|
+
}
|
56
|
+
|
57
|
+
form.advanced .facet_item h3 {
|
58
|
+
cursor: pointer;
|
59
|
+
}
|
60
|
+
|
61
|
+
form.advanced .adv_facet_selections {
|
62
|
+
color:green;
|
63
|
+
font-size: 80%;
|
64
|
+
display: block;
|
65
|
+
margin-top: 0.25em;
|
66
|
+
}
|
67
|
+
|
68
|
+
form.advanced .advanced_button {
|
69
|
+
-moz-border-radius: 4px 4px 4px 4px;
|
70
|
+
-webkit-border-radius: 4px 4px 4px 4px;
|
71
|
+
border-radius: 4px 4px 4px 4px;
|
72
|
+
background-color: #F6F6F6;
|
73
|
+
border: 1px solid #CCCCCC;
|
74
|
+
color: #2E4F81;
|
75
|
+
display: inline-block;
|
76
|
+
float: right;
|
77
|
+
margin-right: 1em;
|
78
|
+
padding: 0.4em 1em;
|
79
|
+
text-decoration: none;
|
80
|
+
}
|
81
|
+
|
82
|
+
form.advanced .reset {
|
83
|
+
}
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
.advanced_help li {
|
88
|
+
margin-top: 0.5em;
|
89
|
+
margin-bottom: 0.5em;
|
90
|
+
}
|
91
|
+
|
92
|
+
form.advanced .sort_submit_buttons {
|
93
|
+
background-color: #EEEEEE;
|
94
|
+
padding: 1em;
|
95
|
+
margin-top: 1em;
|
96
|
+
overflow: hidden; /* trick into containing floats please */
|
97
|
+
}
|
98
|
+
|
99
|
+
form.advanced .constraints {
|
100
|
+
padding: 1em;
|
101
|
+
margin-top: 1em;
|
102
|
+
background-color: #E2EDFE;
|
103
|
+
border: 1px solid #C4DAFE;
|
104
|
+
}
|
105
|
+
|
106
|
+
form.advanced .constraints h4 {
|
107
|
+
margin-bottom: 0.66em;
|
108
|
+
}
|
109
|
+
|
110
|
+
form.advanced .constraints .constraint {
|
111
|
+
display:block;
|
112
|
+
padding-left:2em;
|
113
|
+
text-indent:-2em;
|
114
|
+
}
|
115
|
+
|
116
|
+
form.advanced .constraints .constraint .filterName {
|
117
|
+
font-weight: bold;
|
118
|
+
margin-right: 0.66em;
|
119
|
+
}
|
120
|
+
|
121
|
+
form.advanced .column > h2 {
|
122
|
+
padding: 0.33em;
|
123
|
+
}
|
124
|
+
form.advanced .column > div {
|
125
|
+
padding-left: 0.33em;
|
126
|
+
padding-right: 0.33em;
|
127
|
+
}
|
128
|
+
|
129
|
+
|
@@ -0,0 +1,160 @@
|
|
1
|
+
= The "Parsing Nesting" parser and Solr query transformer
|
2
|
+
|
3
|
+
== User-entered queries handled
|
4
|
+
|
5
|
+
* simple lists of terms and phrases, possibly with + or -, are translated
|
6
|
+
directly to dismax queries, respecting whatever mm is operative for the
|
7
|
+
Blacklight search field definition (either as a specified mm param in the
|
8
|
+
search field definition, or in Solr request handler default)
|
9
|
+
* one two three
|
10
|
+
* one +two -"three phrase"
|
11
|
+
|
12
|
+
* AND/OR/NOT operators can be used for boolean logic. Parenthesis can
|
13
|
+
be used to be clear about grouping, or to make arbitrarily complex
|
14
|
+
nested logic. These operators always apply to only the immediately
|
15
|
+
adjacent terms, unless parens are used, and "OR" 'binds more tightly'
|
16
|
+
than 'AND'
|
17
|
+
* big OR small AND blue OR green === (big OR small) AND (blue OR green)
|
18
|
+
* one AND two OR three AND four === one AND (two OR three) AND four
|
19
|
+
* alternative, with different meaning: (one AND two) OR (three AND four)
|
20
|
+
* NOT one two three === (NOT one) two three === -one two three
|
21
|
+
* alternative, with different meaning: NOT(one two three)
|
22
|
+
|
23
|
+
* lists of terms can be combined with AND/OR/NOT in a variety of ways
|
24
|
+
* one two three OR four === one two (three OR four)
|
25
|
+
* (one two three) AND (big small medium)
|
26
|
+
* NOT(one two) three ((four OR -five) AND (blue green red))
|
27
|
+
* Note that some of these latter ones can have confusing semantics
|
28
|
+
if your dismax mm isn't 100%.
|
29
|
+
|
30
|
+
For instance (one two three) will be
|
31
|
+
a dismax query, let's say mm=1, then the result set would actually
|
32
|
+
be the equivalent of:
|
33
|
+
(one OR two OR three).
|
34
|
+
NOT(one two three) will be an actual complementary NOT, the
|
35
|
+
complementary/inverted set -- so NOT(one two three)
|
36
|
+
(if you had dismax mm=1) will essentially
|
37
|
+
have the same semantics as:
|
38
|
+
NOT(one OR two OR three)
|
39
|
+
which isn't
|
40
|
+
neccesarily what the user is expecting. But if the user always uses
|
41
|
+
explicit boolean connectors, they can exert complete control over
|
42
|
+
the semantics, and not get the 'fuzziness'. Alternately, the local
|
43
|
+
implementer could use only mm=100%, in which case everything is much
|
44
|
+
less fuzzy/hard-to-predict
|
45
|
+
|
46
|
+
== Conversion to Solr
|
47
|
+
|
48
|
+
As mentioned, a straight list of terms such as (in the most complicated)
|
49
|
+
case: one -two +"three four" >> is translated directly to a dismax
|
50
|
+
query for those entered terms. Using the qf/pf/mm/etc you have configured
|
51
|
+
for the Blacklight search_field in question. (While by default the advanced
|
52
|
+
search plugin uses exactly the same field configurations you already have
|
53
|
+
for simple search, you could also choose to pass in different ones for
|
54
|
+
advanced search, perhaps setting mm to 100% if desired for adv search)
|
55
|
+
|
56
|
+
There are a few motivations for doing things this way:
|
57
|
+
|
58
|
+
* To be consistent with simple search, so moving to advanced is less of a
|
59
|
+
conceptual break for the user. If you take a legal simple search, and
|
60
|
+
enter it in a given field in advanced search, it will work exactly the
|
61
|
+
same as it did in simple (even if mm is not 100% in simple), rather than
|
62
|
+
having entirely different semantics.
|
63
|
+
* Taking advantage of that, one might eventually want to actually use this
|
64
|
+
parser in simple search, so user can enter single-field boolean expressions
|
65
|
+
even in simple/basic search.
|
66
|
+
* In the future, we might want to provide actual fielded searches in an
|
67
|
+
'expert' mode. +title: foo AND author:bar+ or
|
68
|
+
+(title:(one two) AND author:(three four)) OR isbn:X+
|
69
|
+
For explicit fielded searching, it is convenient if you can combine
|
70
|
+
dismax searches.
|
71
|
+
|
72
|
+
Once you start putting boolean operators AND, OR, NOT in, the query will
|
73
|
+
no longer neccesarily be converted to a _single_ nested dismax query, a single
|
74
|
+
user-entered string may be converted to multiple nested queries. In some
|
75
|
+
common cases, multiple clauses will still be collapsed into fewer dismax
|
76
|
+
queries than the 'naive' translation. Examples:
|
77
|
+
|
78
|
+
* one two three (blue AND green AND -purple)
|
79
|
+
_query_:"{!dismax}one two three +four +five -purple"
|
80
|
+
* one two three (blue OR green OR purple)
|
81
|
+
_query_:"{!dismax}one two three" AND _query_:"{!dismax mm=1}blue green purple"
|
82
|
+
|
83
|
+
However, if you use complicated crazy nesting, you can get a lot of nested
|
84
|
+
queries generated:
|
85
|
+
* ((one two) AND (three OR four)) OR (blue AND NOT (green OR purple))
|
86
|
+
( ( _query_:"{!dismax }one two" AND _query_:"{!dismax mm=1}three four" ) OR ( _query_:"{!dismax }blue" AND NOT _query_:"{!dismax mm=1}green purple" ) )
|
87
|
+
|
88
|
+
= Note on pure negative queries
|
89
|
+
|
90
|
+
In Solr 1.4.1, the dismax query parser can't handle queries with only "-"
|
91
|
+
excluded terms. And while the lucene query parser can handle certain types
|
92
|
+
of pure negative queries, it can't properly handle a NOT(x) as one of the
|
93
|
+
operands of the "OR". Our query generation strategy notices these cases
|
94
|
+
and transforms to semantically equivalent query that can be handled by
|
95
|
+
Solr properly. At least it tries, this is the least clean part of the code.
|
96
|
+
But there are specs showing it works for some fairly complicated queries.
|
97
|
+
|
98
|
+
* -one -two =>is transformed to=> NOT _query_:"{!dismax mm=1}one two"
|
99
|
+
* $x OR NOT $y =>is transformed to=> $x OR (*:* AND NOT $y)
|
100
|
+
|
101
|
+
This works with very complicated queries when the bad pure negative part
|
102
|
+
would be just a sub-clause or sub-query. Sometimes the result is not
|
103
|
+
the most concise query possible, but it should hold to it's semantics.
|
104
|
+
|
105
|
+
* -red -blue (-foo OR -bar) (big OR NOT small)
|
106
|
+
turns into ==>
|
107
|
+
NOT _query_:"{!dismax mm=1}red blue" AND NOT _query_:"{!dismax mm=100%}foo bar" AND ( _query_:\"{!dismax }big" OR (*:* AND NOT _query_:"{!dismax }small") )
|
108
|
+
|
109
|
+
== Why not use e-dismax?
|
110
|
+
|
111
|
+
That would be a potentially reasonable choice. Why didn't I?
|
112
|
+
|
113
|
+
One, at the time of this writing, edismax is not available in a tagged stable
|
114
|
+
Solr release, and I write code for Blacklight that works with tagged stable
|
115
|
+
releases.
|
116
|
+
|
117
|
+
Two, edismax doesn't neccesarily entirely support the semantics I want,
|
118
|
+
especially for features I would like to add in the future. I am not sure
|
119
|
+
exactly what edismax does with complicated deeply nested expressions.
|
120
|
+
For fielded searches, dismax supports actual individual solr fields, but not
|
121
|
+
the "fields" as dismax qf aggregates that we need. These things could
|
122
|
+
be added to dismax, but with my lack of Java chops and familiarity with
|
123
|
+
Solr code, it would have taken me much longer to do (and been much less
|
124
|
+
enjoyable).
|
125
|
+
|
126
|
+
I think it may be a reasonable choice to seperate concerns between Solr
|
127
|
+
and the app layer like this, let Solr handle basic search expressions,
|
128
|
+
but let the app layer handle more complicated query parsing, translating
|
129
|
+
to those simple expressions.
|
130
|
+
|
131
|
+
On the other hand, there are definite downsides to this approach. Including
|
132
|
+
having to deal with idiosyncracies of built-in query parsers ("pure
|
133
|
+
negative" behavior), depend upon other idiosyncracies (dismax does not
|
134
|
+
apply mm to -excluded terms), etc. And not being able to share the code
|
135
|
+
at the Solr/Java level.
|
136
|
+
|
137
|
+
In the future, a different approach that might be best of all could be
|
138
|
+
using the not-yet-finished XML query parser, to do initial parsing in
|
139
|
+
ruby at the app level, but translate to specified lucene primitives using
|
140
|
+
XML query parser, instead of having to translate to lucene/dismax query
|
141
|
+
parsers.
|
142
|
+
|
143
|
+
== Future Enhancement Ideas
|
144
|
+
Just ideas.
|
145
|
+
|
146
|
+
1. Allow expert "fielded" searches. title:foo
|
147
|
+
which would correspond not to actual solr index field "title", but
|
148
|
+
to a Blacklight-configured "search field" qf/pf.
|
149
|
+
2. Insert this app-level parser even in "simple" search, so users
|
150
|
+
can use boolean operators even in a single-fielded simple search.
|
151
|
+
3. Allow a different set of qf to be used for any "phrase term", so
|
152
|
+
phrases would search only on non-stemming fields. This would be cool,
|
153
|
+
but kind of do weird things with dismax mm effects, since it would
|
154
|
+
mean all phrases would be extracted into seperate nested queries.
|
155
|
+
4. Better error handling of syntax errors in query entry. Both in the
|
156
|
+
plugin as a whole, error messages should be displayed on the input
|
157
|
+
screen, so the entry can be fixed. But also using Parslet for parsing,
|
158
|
+
we can potentially deliver better error messages guessing what they
|
159
|
+
got wrong where in their entry.
|
160
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'parslet'
|
3
|
+
|
4
|
+
# Parslet uses Object#tap, which is in ruby 1.8.7+, but not 1.8.6.
|
5
|
+
# But it's easy enough to implement in pure ruby, let's monkey patch
|
6
|
+
# it in if it's not there, so we'll still work with 1.8.6
|
7
|
+
unless Object.method_defined?(:tap)
|
8
|
+
class Object
|
9
|
+
def tap
|
10
|
+
yield(self)
|
11
|
+
return self
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
module ParsingNesting
|
16
|
+
class Grammar < Parslet::Parser
|
17
|
+
root :query
|
18
|
+
|
19
|
+
# query is actually a list of expressions.
|
20
|
+
rule :query do
|
21
|
+
(spacing? >> (expression | paren_unit ) >> spacing?).repeat
|
22
|
+
end
|
23
|
+
|
24
|
+
rule :paren_list do
|
25
|
+
(str('(') >> query >> str(')')).as(:list)
|
26
|
+
end
|
27
|
+
|
28
|
+
rule :paren_unit do
|
29
|
+
(str('(') >> spacing? >> (expression ) >> spacing? >> str(')')) |
|
30
|
+
paren_list
|
31
|
+
end
|
32
|
+
|
33
|
+
# Note well: It was tricky to parse the thing we want where you can
|
34
|
+
# have a flat list with boolean operators, but where 'OR' takes precedence.
|
35
|
+
# eg "A AND B OR C AND C" or "A OR B AND C OR D". Tricky to parse at all,
|
36
|
+
# tricky to make precedence work. Important things that seem to make it work:
|
37
|
+
# and_list comes BEFORE or_list in :expression.
|
38
|
+
# and_list's operand can be an or_list, but NOT vice versa
|
39
|
+
# There are others, it was an iterative process with testing.
|
40
|
+
rule :expression do
|
41
|
+
(and_list | or_list | unary_expression )
|
42
|
+
end
|
43
|
+
|
44
|
+
rule :and_list do
|
45
|
+
((or_list | unary_expression | paren_unit) >>
|
46
|
+
(spacing >> str("AND") >> spacing >> (or_list | unary_expression | paren_unit)).repeat(1)).as(:and_list)
|
47
|
+
end
|
48
|
+
|
49
|
+
rule :or_list do
|
50
|
+
((unary_expression | paren_unit) >>
|
51
|
+
(spacing >> str("OR") >> spacing >> (unary_expression | paren_unit)).repeat(1)).as(:or_list)
|
52
|
+
end
|
53
|
+
|
54
|
+
rule :unary_expression do
|
55
|
+
(str('+') >> (phrase | token)).as(:mandatory) |
|
56
|
+
(str('-') >> (phrase | token)).as(:excluded) |
|
57
|
+
(str('NOT') >> spacing? >> (unary_expression | paren_unit)).as(:not_expression) |
|
58
|
+
(phrase | token)
|
59
|
+
end
|
60
|
+
|
61
|
+
rule :token do
|
62
|
+
match['^ ")('].repeat(1).as(:token)
|
63
|
+
end
|
64
|
+
rule :phrase do
|
65
|
+
match('"') >> match['^"'].repeat(1).as(:phrase) >> match('"')
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
rule :spacing do
|
70
|
+
match[' '].repeat(1)
|
71
|
+
end
|
72
|
+
rule :spacing? do
|
73
|
+
spacing.maybe
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
end
|