solr_search_sanitizer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,8 @@
1
+ SolrSearchSanitizer
2
+ ===================
3
+
4
+ A gem to provide a way of sanitizing SOLR searches containing advanced search characters and nested parentheses. This gem assumes that ECommerce stores do not advertise this advanced functionality of SOLR to users and so the query must be sanitized to prevent incorrect syntax.
5
+
6
+ Copyright (c) 2011 minustehbare, released under the New BSD License
7
+
8
+
@@ -0,0 +1,121 @@
1
+ module SolrSearchSanitizer
2
+ module Sanitizer
3
+ BOOLEAN_OPERATORS_REGEXP = /\b(AND)\b|\b(OR)\b|\b(NOT)\b|\B(&&)\B|\B(!)\B|\B(\|\|)\B/
4
+ BRACKET_REGEXP = /((\()|(\))|(\{)|(\})|(\[)|(\]))/
5
+ WILDCARD_REGEXP = /((\*)|(\?))/
6
+ #FUZZY_REGEXP = /[\w|\"](~)[\d]?/
7
+ FUZZY_REGEXP = /(~)/
8
+ #BOOST_REGEXP = /[\w|\"](\^)[\d]?/
9
+ BOOST_REGEXP = /(\^)/
10
+ #BOOLEAN_MODIFIER_REGEXP = /((\+)|(-))[(\w+(\b))|(\".+\")]/
11
+ BOOLEAN_MODIFIER_REGEXP = /((\+)|(-))/
12
+ MISC_REGEXP = /(\")|(:)/
13
+
14
+ ####################
15
+ #
16
+ # All REGEXP definitions match characters or keywords that are part of the Apache Lucene Query Parser Sytntax
17
+ #
18
+ # BOOLEAN_OPERATOR_REGEXP => matches boolean operators on multiple search terms
19
+ #
20
+ # BRACKET_REGEXP => matches parentheses and brackets and braces that group search clauses or define range searches
21
+ #
22
+ # WILDCARD_REGEXP => matches ? or * characters in the search that act as single or multiple character wildcards within a search term
23
+ #
24
+ # FUZZY_REGEXP => matches the ~ character at the end of a search term or phrase
25
+ #
26
+ # BOOST_REGEXP => matches the ^ character at the end of a search term or phrase
27
+ #
28
+ # BOOLEAN_MODIFIER_REGEXP => matches + or - characters at the start of search terms/phrases
29
+ #
30
+ # MISC_REGEXP => matches (currently) : and " characters that are used for defining phrases or field values
31
+ #
32
+ ####################
33
+
34
+ def escape_boolean_operators(query)
35
+ return nil unless query
36
+ new_query = query.gsub(BOOLEAN_OPERATORS_REGEXP, '&&' => '\\&\\&', '||' => '\\|\\|', '!' => '\\!', 'AND' => 'and', 'NOT' => 'not', 'OR' => 'or')
37
+ end
38
+
39
+ def remove_boolean_operators(query)
40
+ return nil unless query
41
+ new_query = query.gsub(BOOLEAN_OPERATORS_REGEXP, '')
42
+ end
43
+
44
+ def escape_brackets(query)
45
+ return nil unless query
46
+ new_query = query.gsub(BRACKET_REGEXP, '\\\\\1')
47
+ end
48
+
49
+ def remove_brackets(query)
50
+ return nil unless query
51
+ new_query = query.gsub(BRACKET_REGEXP, '')
52
+ end
53
+
54
+ def escape_wildcards(query)
55
+ return nil unless query
56
+ new_query = query.gsub(WILDCARD_REGEXP, '\\\\\1')
57
+ end
58
+
59
+ def remove_wildcards(query)
60
+ return nil unless query
61
+ new_query = query.gsub(WILDCARD_REGEXP, '')
62
+ end
63
+
64
+ def escape_fuzzy(query)
65
+ return nil unless query
66
+ new_query = query.gsub(FUZZY_REGEXP, '\\\\\1')
67
+ end
68
+
69
+ def remove_fuzzy(query)
70
+ return nil unless query
71
+ new_query = query.gsub(FUZZY_REGEXP, '')
72
+ end
73
+
74
+ def escape_boost(query)
75
+ return nil unless query
76
+ new_query = query.gsub(BOOST_REGEXP, '\\\\\1')
77
+ end
78
+
79
+ def remove_boost(query)
80
+ return nil unless query
81
+ new_query = query.gsub(BOOST_REGEXP, '')
82
+ end
83
+
84
+ def escape_boolean_modifiers(query)
85
+ return nil unless query
86
+ new_query = query.gsub(BOOLEAN_MODIFIER_REGEXP, '\\\\\1')
87
+ end
88
+
89
+ def remove_boolean_modifiers(query)
90
+ return nil unless query
91
+ new_query = query.gsub(BOOLEAN_MODIFIER_REGEXP, '')
92
+ end
93
+
94
+ # : (colon) characters in SOLR searches are modified by acts_as_solr_reloaded
95
+ # after they are escaped
96
+ #
97
+ # without sanitization, some_field:some_value => some_field_t:some_value from acts_as_solr
98
+ # with sanitization, some_field:some_value => some_field\_t:some_value
99
+ #
100
+ # for this reason we simply remove the : from the search entirely since it cannot be escaped
101
+ def escape_misc(query)
102
+ return nil unless query
103
+ new_query = query.gsub(MISC_REGEXP, '"' => '\\"', ':' => '')
104
+ end
105
+
106
+ def remove_misc(query)
107
+ return nil unless query
108
+ new_query = query.gsub(MISC_REGEXP, '')
109
+ end
110
+
111
+ def escape_special_characters(query)
112
+ new_query = escape_boolean_operators(query)
113
+ new_query = escape_brackets(new_query)
114
+ new_query = escape_wildcards(new_query)
115
+ new_query = escape_fuzzy(new_query)
116
+ new_query = escape_boost(new_query)
117
+ new_query = escape_boolean_modifiers(new_query)
118
+ new_query = escape_misc(new_query)
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,19 @@
1
+ Gem::Specification.new do |s|
2
+ s.platform = Gem::Platform::RUBY
3
+ s.name = 'solr_search_sanitizer'
4
+ s.version = '0.1.0'
5
+ s.summary = 'Provides sanitizer methods for SOLR queries.'
6
+ s.description = 'Provides sanitizer methods for SOLR queries.'
7
+ s.required_ruby_version = '>= 1.8.7'
8
+
9
+ s.author = 'minustehbare'
10
+ s.email = 'minustehbare@gmail.com'
11
+ # s.homepage = 'http://www.rubyonrails.org'
12
+ # s.rubyforge_project = 'actionmailer'
13
+
14
+ s.files = `git ls-files`.split("\n")
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ s.require_path = 'lib'
17
+ s.requirements << 'none'
18
+
19
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: solr_search_sanitizer
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.0
6
+ platform: ruby
7
+ authors:
8
+ - minustehbare
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-07-14 00:00:00 -04:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description: Provides sanitizer methods for SOLR queries.
18
+ email: minustehbare@gmail.com
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - README.md
27
+ - lib/solr_search_sanitizer/sanitizer.rb
28
+ - solr_search_sanitizer.gemspec
29
+ has_rdoc: true
30
+ homepage:
31
+ licenses: []
32
+
33
+ post_install_message:
34
+ rdoc_options: []
35
+
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 1.8.7
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ none: false
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ requirements:
51
+ - none
52
+ rubyforge_project:
53
+ rubygems_version: 1.6.2
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Provides sanitizer methods for SOLR queries.
57
+ test_files: []
58
+