solr_query-jgp 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README +46 -0
- data/Rakefile +22 -0
- data/lib/solr_query.rb +153 -0
- metadata +84 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 [Matthew Rudy Jacobs]
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
SolrQuery
|
2
|
+
=========
|
3
|
+
|
4
|
+
SolrQuery is a ruby library designed to make building nested Solr queries simple and standardized.
|
5
|
+
|
6
|
+
It does everything for you,
|
7
|
+
it deals with escaping characters,
|
8
|
+
matching ActiveRecord objects by id.
|
9
|
+
|
10
|
+
It'll take an array of strings,
|
11
|
+
an array of ActiveRecords.
|
12
|
+
|
13
|
+
Give it a Hash or a Range, and it'll sort out lucene range queries for you!
|
14
|
+
|
15
|
+
Example
|
16
|
+
=======
|
17
|
+
|
18
|
+
build a query for solr;
|
19
|
+
|
20
|
+
SolrQuery.build(:keyword => "Feather duster")
|
21
|
+
#=> "feather duster"
|
22
|
+
|
23
|
+
SolrQuery.build(:keyword => "clean", :organisation => [organisation1, organisation2])
|
24
|
+
#=> "clean AND organisation:(275 OR 6534)"
|
25
|
+
|
26
|
+
SolrQuery.build(:colour => ["red", "pink"], :item_type => ["Toy", "Train"])
|
27
|
+
#=> "colour:(red OR pink) AND item_type:(Toy OR Train)"
|
28
|
+
|
29
|
+
or you can specify a different magical key for keyword;
|
30
|
+
|
31
|
+
SolrQuery.build({:keyword => "old one", :new_keyword => "new one"}, :new_keyword)
|
32
|
+
#=> "new one AND keyword:(old one)"
|
33
|
+
|
34
|
+
if you need to do range queries;
|
35
|
+
|
36
|
+
SolrQuery.build(:salary => {:min => "010000", :max => "050000"})
|
37
|
+
#=> "salary:(010000 TO 050000)"
|
38
|
+
|
39
|
+
SolrQuery.build(:salary => "010000".."050000")
|
40
|
+
#=> "salary:(010000 TO 050000)"
|
41
|
+
|
42
|
+
SolrQuery.build(:surname => {:min => "jacobs")
|
43
|
+
#=> "surname:(jacobs TO *)"
|
44
|
+
|
45
|
+
|
46
|
+
Copyright (c) 2008 [Matthew Rudy Jacobs], released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'spec'
|
3
|
+
require 'spec/rake/spectask'
|
4
|
+
require 'rake/rdoctask'
|
5
|
+
|
6
|
+
desc 'Default: run the specs.'
|
7
|
+
task :default => :spec
|
8
|
+
|
9
|
+
desc 'Run specs for rude_q plugin'
|
10
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
11
|
+
t.spec_opts = ['--options', "\"spec/spec.opts\""]
|
12
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'Generate documentation for the solr_query plugin.'
|
16
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
17
|
+
rdoc.rdoc_dir = 'rdoc'
|
18
|
+
rdoc.title = 'SolrQuery'
|
19
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
20
|
+
rdoc.rdoc_files.include('README')
|
21
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
22
|
+
end
|
data/lib/solr_query.rb
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
unless nil.respond_to?(:blank?)
|
2
|
+
require File.join(File.dirname(__FILE__), "blank")
|
3
|
+
end
|
4
|
+
|
5
|
+
module SolrQuery
|
6
|
+
class << self
|
7
|
+
# build a query for solr
|
8
|
+
#
|
9
|
+
# SolrQuery.build(:keyword => "Feather duster")
|
10
|
+
# => "feather duster"
|
11
|
+
#
|
12
|
+
# SolrQuery.build(:keyword => "clean", :organisation => [organisation1, organisation2])
|
13
|
+
# => "clean AND organisation:(275 OR 6534)"
|
14
|
+
#
|
15
|
+
# SolrQuery.build(:colour => ["red", "pink"], :item_type => ["Toy", "Train"])
|
16
|
+
# => "colour:(red OR pink) AND item_type:(Toy OR Train)"
|
17
|
+
#
|
18
|
+
# or you can specify a different magical key for keyword;
|
19
|
+
#
|
20
|
+
# SolrQuery.build({:keyword => "old one", :new_keyword => "new one"}, {:keyword_key => :new_keyword})
|
21
|
+
# => "new one AND keyword:(old one)"
|
22
|
+
# if you need to do range queries;
|
23
|
+
#
|
24
|
+
# SolrQuery.build(:salary => {:min => "010000", :max => "050000"})
|
25
|
+
# => "salary:(010000 TO 050000)"
|
26
|
+
#
|
27
|
+
# SolrQuery.build(:salary => "010000".."050000")
|
28
|
+
# => "salary:(010000 TO 050000)"
|
29
|
+
#
|
30
|
+
# SolrQuery.build(:surname => {:min => "jacobs")
|
31
|
+
# => "surname:(jacobs TO *)"
|
32
|
+
def build(conditions = {}, opts={})
|
33
|
+
conditions = conditions.dup # let's not accidentally kill our original params
|
34
|
+
opts = opts.dup
|
35
|
+
opts[:keyword_key] ||= :keyword
|
36
|
+
opts[:keyword_boost] ||= nil # field name in which keyword relvance should be boosted (via a disgusting hack)
|
37
|
+
opts[:keyword_proximity] ||= 1000 # term proximity required to boost scores based on proximity, see http://wiki.apache.org/solr/SolrRelevancyCookbook
|
38
|
+
query_parts = []
|
39
|
+
keyword = conditions.delete(opts[:keyword_key]) # keyword is magical
|
40
|
+
keyword = solr_value(keyword, true, false)
|
41
|
+
unless keyword.blank?
|
42
|
+
if keyword.include?(' OR ') || keyword.include?(' AND ')
|
43
|
+
# backwards compatibility - don't mess with keywords that already contain boolean operators (which effectively means don't mess with keywords provided as an array)
|
44
|
+
query_parts << "#{keyword}"
|
45
|
+
else
|
46
|
+
if keyword.include?(' ')
|
47
|
+
# Find multiple keywords near each other, but also allow for keywords/phrase that ends with "in <some location>".
|
48
|
+
# If keyword provided contains " in ", words before the in are considered to be the keywords that need to
|
49
|
+
# be near each other in the text, words after the in are considered location(s) that can appear anywhere.
|
50
|
+
phrases = keyword.split(' in ') # split keywords in general keywords and location keywords
|
51
|
+
proximity = opts[:keyword_proximity].to_i / phrases.size # if we have both general and location keywords, each set should be nearer each other
|
52
|
+
query_parts << "text:\"#{phrases.shift}\"~#{proximity}" # general keywords
|
53
|
+
query_parts << "text:\"#{phrases.join(' ')}\"~#{proximity}" unless phrases.empty? # other (i.e. location) keywords
|
54
|
+
else
|
55
|
+
query_parts << "#{keyword}"
|
56
|
+
end
|
57
|
+
if opts[:keyword_boost]
|
58
|
+
# Index time boosting not working, so boost score for matches in boost field by explicitly looking for each keyword in that field
|
59
|
+
query_parts[0] = "(" + query_parts[0] + " OR (" + keyword.split(/\s+/).map{|k| "#{opts[:keyword_boost]}:#{k}"}.join(' AND ') + "))"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
conditions.each do |field, value|
|
65
|
+
unless value.nil?
|
66
|
+
query_parts << "#{field}:(#{solr_value(value)})"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
if query_parts.empty?
|
71
|
+
return ""
|
72
|
+
else
|
73
|
+
return query_parts.join(" AND ")
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
|
79
|
+
def solr_value(object, downcase=false, clean=false)
|
80
|
+
if object.is_a?(Array) # case when Array will break for has_manys
|
81
|
+
if object.empty?
|
82
|
+
string = "NIL" # an empty array should be equivalent to "don't match anything"
|
83
|
+
else
|
84
|
+
string = object.map do |element|
|
85
|
+
solr_value(element, downcase, clean)
|
86
|
+
end.delete_if{|element| element.blank?}.join(" OR ")
|
87
|
+
downcase = false # don't downcase the ORs
|
88
|
+
end
|
89
|
+
elsif object.is_a?(Hash) || object.is_a?(Range)
|
90
|
+
return solr_range(object) # avoid escaping the *
|
91
|
+
elsif defined?(ActiveRecord) && object.is_a?(ActiveRecord::Base)
|
92
|
+
string = object.id.to_s
|
93
|
+
elsif object.is_a?(String)
|
94
|
+
if downcase && object =~ /\s(OR|AND)\s/
|
95
|
+
string = solr_value(object.gsub(/\s(OR|AND)\s/,'__\1__'), true, clean)
|
96
|
+
string.gsub!('__or__',' OR ')
|
97
|
+
string.gsub!('__and__',' AND ')
|
98
|
+
if !clean && string.include?('(') && string.include?(')') && ( string.scan('(').size == string.scan(')').size )
|
99
|
+
# equal number of opening and closing brackets, un-escape them (yeah, it's not perfect, but it'll do)
|
100
|
+
string.gsub!(/\\+(\(|\))/,'\1')
|
101
|
+
end
|
102
|
+
return '(' + string + ')'
|
103
|
+
else
|
104
|
+
string = object
|
105
|
+
end
|
106
|
+
else
|
107
|
+
string = object.to_s
|
108
|
+
end
|
109
|
+
string.downcase! if downcase
|
110
|
+
return clean ? clean_solr_string(string) : escape_solr_string(string)
|
111
|
+
end
|
112
|
+
|
113
|
+
def solr_range(object)
|
114
|
+
min = max = nil
|
115
|
+
if object.is_a?(Hash)
|
116
|
+
min = object[:min]
|
117
|
+
max = object[:max]
|
118
|
+
else
|
119
|
+
min = object.first
|
120
|
+
max = object.last
|
121
|
+
end
|
122
|
+
min = solr_value(min) if min
|
123
|
+
max = solr_value(max) if max
|
124
|
+
|
125
|
+
min ||= "*"
|
126
|
+
max ||= "*"
|
127
|
+
|
128
|
+
return "[#{min} TO #{max}]"
|
129
|
+
end
|
130
|
+
|
131
|
+
def clean_solr_string(str)
|
132
|
+
str.gsub(RE_ESCAPE_LUCENE,'').gsub(/\s+/,' ').strip
|
133
|
+
end
|
134
|
+
|
135
|
+
def escape_solr_string(str)
|
136
|
+
str.gsub(RE_ESCAPE_LUCENE) { |m| "\\#{m}" }.gsub(/\s+/,' ').gsub(ENDING_KEYWORDS) { |w| w.downcase }.strip
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# The Lucene documentation declares special characters to be:
|
141
|
+
# + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
142
|
+
# and I've added a semi-colon, because I find them offensive ;-)
|
143
|
+
# note: this nice code comes from Jeremy Voorhis's Lucene query builder at http://github.com/jvoorhis/lucene_query
|
144
|
+
RE_ESCAPE_LUCENE = /
|
145
|
+
( [-+!\(\)\{\}\[\]^"~*?:;\\] # A special character
|
146
|
+
| && # Boolean &&
|
147
|
+
| \|\| # Boolean ||
|
148
|
+
)
|
149
|
+
/x
|
150
|
+
|
151
|
+
ENDING_KEYWORDS = /(AND$ | OR$ | NOT$)/x
|
152
|
+
|
153
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: solr_query-jgp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
- 3
|
10
|
+
version: 1.1.3
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Matthew Rudy Jacobs
|
14
|
+
- Mark Woods
|
15
|
+
- Tom Stuart
|
16
|
+
autorequire:
|
17
|
+
bindir: bin
|
18
|
+
cert_chain: []
|
19
|
+
|
20
|
+
date: 2011-05-18 00:00:00 Z
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: rspec
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
hash: 3
|
31
|
+
segments:
|
32
|
+
- 0
|
33
|
+
version: "0"
|
34
|
+
type: :development
|
35
|
+
version_requirements: *id001
|
36
|
+
description: Build SOLR queries, properly escaped, with a nice API
|
37
|
+
email: development@jobsgopublic.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files:
|
43
|
+
- README
|
44
|
+
files:
|
45
|
+
- MIT-LICENSE
|
46
|
+
- Rakefile
|
47
|
+
- README
|
48
|
+
- lib/solr_query.rb
|
49
|
+
homepage: http://github.com/matthewrudy/solr_query
|
50
|
+
licenses: []
|
51
|
+
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options:
|
54
|
+
- --main
|
55
|
+
- README
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
hash: 3
|
64
|
+
segments:
|
65
|
+
- 0
|
66
|
+
version: "0"
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
hash: 3
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
requirements: []
|
77
|
+
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.7.2
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: a ruby library designed to make building nested Solr queries simple and standardized.
|
83
|
+
test_files: []
|
84
|
+
|