solr_query-jgp 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +46 -0
  3. data/Rakefile +22 -0
  4. data/lib/solr_query.rb +153 -0
  5. metadata +84 -0
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 [Matthew Rudy Jacobs]
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,46 @@
1
+ SolrQuery
2
+ =========
3
+
4
+ SolrQuery is a ruby library designed to make building nested Solr queries simple and standardized.
5
+
6
+ It does everything for you,
7
+ it deals with escaping characters,
8
+ matching ActiveRecord objects by id.
9
+
10
+ It'll take an array of strings,
11
+ an array of ActiveRecords.
12
+
13
+ Give it a Hash or a Range, and it'll sort out lucene range queries for you!
14
+
15
+ Example
16
+ =======
17
+
18
+ build a query for solr;
19
+
20
+ SolrQuery.build(:keyword => "Feather duster")
21
+ #=> "feather duster"
22
+
23
+ SolrQuery.build(:keyword => "clean", :organisation => [organisation1, organisation2])
24
+ #=> "clean AND organisation:(275 OR 6534)"
25
+
26
+ SolrQuery.build(:colour => ["red", "pink"], :item_type => ["Toy", "Train"])
27
+ #=> "colour:(red OR pink) AND item_type:(Toy OR Train)"
28
+
29
+ or you can specify a different magical key for keyword;
30
+
31
+ SolrQuery.build({:keyword => "old one", :new_keyword => "new one"}, :new_keyword)
32
+ #=> "new one AND keyword:(old one)"
33
+
34
+ if you need to do range queries;
35
+
36
+ SolrQuery.build(:salary => {:min => "010000", :max => "050000"})
37
+ #=> "salary:(010000 TO 050000)"
38
+
39
+ SolrQuery.build(:salary => "010000".."050000")
40
+ #=> "salary:(010000 TO 050000)"
41
+
42
+ SolrQuery.build(:surname => {:min => "jacobs")
43
+ #=> "surname:(jacobs TO *)"
44
+
45
+
46
+ Copyright (c) 2008 [Matthew Rudy Jacobs], released under the MIT license
data/Rakefile ADDED
@@ -0,0 +1,22 @@
1
+ require 'rake'
2
+ require 'spec'
3
+ require 'spec/rake/spectask'
4
+ require 'rake/rdoctask'
5
+
6
+ desc 'Default: run the specs.'
7
+ task :default => :spec
8
+
9
+ desc 'Run specs for rude_q plugin'
10
+ Spec::Rake::SpecTask.new(:spec) do |t|
11
+ t.spec_opts = ['--options', "\"spec/spec.opts\""]
12
+ t.spec_files = FileList['spec/**/*_spec.rb']
13
+ end
14
+
15
+ desc 'Generate documentation for the solr_query plugin.'
16
+ Rake::RDocTask.new(:rdoc) do |rdoc|
17
+ rdoc.rdoc_dir = 'rdoc'
18
+ rdoc.title = 'SolrQuery'
19
+ rdoc.options << '--line-numbers' << '--inline-source'
20
+ rdoc.rdoc_files.include('README')
21
+ rdoc.rdoc_files.include('lib/**/*.rb')
22
+ end
data/lib/solr_query.rb ADDED
@@ -0,0 +1,153 @@
1
+ unless nil.respond_to?(:blank?)
2
+ require File.join(File.dirname(__FILE__), "blank")
3
+ end
4
+
5
+ module SolrQuery
6
+ class << self
7
+ # build a query for solr
8
+ #
9
+ # SolrQuery.build(:keyword => "Feather duster")
10
+ # => "feather duster"
11
+ #
12
+ # SolrQuery.build(:keyword => "clean", :organisation => [organisation1, organisation2])
13
+ # => "clean AND organisation:(275 OR 6534)"
14
+ #
15
+ # SolrQuery.build(:colour => ["red", "pink"], :item_type => ["Toy", "Train"])
16
+ # => "colour:(red OR pink) AND item_type:(Toy OR Train)"
17
+ #
18
+ # or you can specify a different magical key for keyword;
19
+ #
20
+ # SolrQuery.build({:keyword => "old one", :new_keyword => "new one"}, {:keyword_key => :new_keyword})
21
+ # => "new one AND keyword:(old one)"
22
+ # if you need to do range queries;
23
+ #
24
+ # SolrQuery.build(:salary => {:min => "010000", :max => "050000"})
25
+ # => "salary:(010000 TO 050000)"
26
+ #
27
+ # SolrQuery.build(:salary => "010000".."050000")
28
+ # => "salary:(010000 TO 050000)"
29
+ #
30
+ # SolrQuery.build(:surname => {:min => "jacobs")
31
+ # => "surname:(jacobs TO *)"
32
+ def build(conditions = {}, opts={})
33
+ conditions = conditions.dup # let's not accidentally kill our original params
34
+ opts = opts.dup
35
+ opts[:keyword_key] ||= :keyword
36
+ opts[:keyword_boost] ||= nil # field name in which keyword relvance should be boosted (via a disgusting hack)
37
+ opts[:keyword_proximity] ||= 1000 # term proximity required to boost scores based on proximity, see http://wiki.apache.org/solr/SolrRelevancyCookbook
38
+ query_parts = []
39
+ keyword = conditions.delete(opts[:keyword_key]) # keyword is magical
40
+ keyword = solr_value(keyword, true, false)
41
+ unless keyword.blank?
42
+ if keyword.include?(' OR ') || keyword.include?(' AND ')
43
+ # backwards compatibility - don't mess with keywords that already contain boolean operators (which effectively means don't mess with keywords provided as an array)
44
+ query_parts << "#{keyword}"
45
+ else
46
+ if keyword.include?(' ')
47
+ # Find multiple keywords near each other, but also allow for keywords/phrase that ends with "in <some location>".
48
+ # If keyword provided contains " in ", words before the in are considered to be the keywords that need to
49
+ # be near each other in the text, words after the in are considered location(s) that can appear anywhere.
50
+ phrases = keyword.split(' in ') # split keywords in general keywords and location keywords
51
+ proximity = opts[:keyword_proximity].to_i / phrases.size # if we have both general and location keywords, each set should be nearer each other
52
+ query_parts << "text:\"#{phrases.shift}\"~#{proximity}" # general keywords
53
+ query_parts << "text:\"#{phrases.join(' ')}\"~#{proximity}" unless phrases.empty? # other (i.e. location) keywords
54
+ else
55
+ query_parts << "#{keyword}"
56
+ end
57
+ if opts[:keyword_boost]
58
+ # Index time boosting not working, so boost score for matches in boost field by explicitly looking for each keyword in that field
59
+ query_parts[0] = "(" + query_parts[0] + " OR (" + keyword.split(/\s+/).map{|k| "#{opts[:keyword_boost]}:#{k}"}.join(' AND ') + "))"
60
+ end
61
+ end
62
+ end
63
+
64
+ conditions.each do |field, value|
65
+ unless value.nil?
66
+ query_parts << "#{field}:(#{solr_value(value)})"
67
+ end
68
+ end
69
+
70
+ if query_parts.empty?
71
+ return ""
72
+ else
73
+ return query_parts.join(" AND ")
74
+ end
75
+ end
76
+
77
+ protected
78
+
79
+ def solr_value(object, downcase=false, clean=false)
80
+ if object.is_a?(Array) # case when Array will break for has_manys
81
+ if object.empty?
82
+ string = "NIL" # an empty array should be equivalent to "don't match anything"
83
+ else
84
+ string = object.map do |element|
85
+ solr_value(element, downcase, clean)
86
+ end.delete_if{|element| element.blank?}.join(" OR ")
87
+ downcase = false # don't downcase the ORs
88
+ end
89
+ elsif object.is_a?(Hash) || object.is_a?(Range)
90
+ return solr_range(object) # avoid escaping the *
91
+ elsif defined?(ActiveRecord) && object.is_a?(ActiveRecord::Base)
92
+ string = object.id.to_s
93
+ elsif object.is_a?(String)
94
+ if downcase && object =~ /\s(OR|AND)\s/
95
+ string = solr_value(object.gsub(/\s(OR|AND)\s/,'__\1__'), true, clean)
96
+ string.gsub!('__or__',' OR ')
97
+ string.gsub!('__and__',' AND ')
98
+ if !clean && string.include?('(') && string.include?(')') && ( string.scan('(').size == string.scan(')').size )
99
+ # equal number of opening and closing brackets, un-escape them (yeah, it's not perfect, but it'll do)
100
+ string.gsub!(/\\+(\(|\))/,'\1')
101
+ end
102
+ return '(' + string + ')'
103
+ else
104
+ string = object
105
+ end
106
+ else
107
+ string = object.to_s
108
+ end
109
+ string.downcase! if downcase
110
+ return clean ? clean_solr_string(string) : escape_solr_string(string)
111
+ end
112
+
113
+ def solr_range(object)
114
+ min = max = nil
115
+ if object.is_a?(Hash)
116
+ min = object[:min]
117
+ max = object[:max]
118
+ else
119
+ min = object.first
120
+ max = object.last
121
+ end
122
+ min = solr_value(min) if min
123
+ max = solr_value(max) if max
124
+
125
+ min ||= "*"
126
+ max ||= "*"
127
+
128
+ return "[#{min} TO #{max}]"
129
+ end
130
+
131
+ def clean_solr_string(str)
132
+ str.gsub(RE_ESCAPE_LUCENE,'').gsub(/\s+/,' ').strip
133
+ end
134
+
135
+ def escape_solr_string(str)
136
+ str.gsub(RE_ESCAPE_LUCENE) { |m| "\\#{m}" }.gsub(/\s+/,' ').gsub(ENDING_KEYWORDS) { |w| w.downcase }.strip
137
+ end
138
+ end
139
+
140
+ # The Lucene documentation declares special characters to be:
141
+ # + - && || ! ( ) { } [ ] ^ " ~ * ? : \
142
+ # and I've added a semi-colon, because I find them offensive ;-)
143
+ # note: this nice code comes from Jeremy Voorhis's Lucene query builder at http://github.com/jvoorhis/lucene_query
144
+ RE_ESCAPE_LUCENE = /
145
+ ( [-+!\(\)\{\}\[\]^"~*?:;\\] # A special character
146
+ | && # Boolean &&
147
+ | \|\| # Boolean ||
148
+ )
149
+ /x
150
+
151
+ ENDING_KEYWORDS = /(AND$ | OR$ | NOT$)/x
152
+
153
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: solr_query-jgp
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 1
9
+ - 3
10
+ version: 1.1.3
11
+ platform: ruby
12
+ authors:
13
+ - Matthew Rudy Jacobs
14
+ - Mark Woods
15
+ - Tom Stuart
16
+ autorequire:
17
+ bindir: bin
18
+ cert_chain: []
19
+
20
+ date: 2011-05-18 00:00:00 Z
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: rspec
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ type: :development
35
+ version_requirements: *id001
36
+ description: Build SOLR queries, properly escaped, with a nice API
37
+ email: development@jobsgopublic.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files:
43
+ - README
44
+ files:
45
+ - MIT-LICENSE
46
+ - Rakefile
47
+ - README
48
+ - lib/solr_query.rb
49
+ homepage: http://github.com/matthewrudy/solr_query
50
+ licenses: []
51
+
52
+ post_install_message:
53
+ rdoc_options:
54
+ - --main
55
+ - README
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ requirements: []
77
+
78
+ rubyforge_project:
79
+ rubygems_version: 1.7.2
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: a ruby library designed to make building nested Solr queries simple and standardized.
83
+ test_files: []
84
+