lucene_query_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ -f doc
2
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in lucene_query_parser.gemspec
4
+ gemspec
5
+
6
+ group "development" do
7
+ gem "guard"
8
+ gem "guard-rspec"
9
+
10
+ if PLATFORM =~ /darwin/
11
+ gem "rb-fsevent"
12
+ gem "growl_notify"
13
+ end
14
+ end
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ guard 'rspec', :version => 2, :cli => "--format doc --colour" do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec/" }
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec/" }
8
+ end
9
+
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # Lucene Query Parser
2
+
3
+ ## Synopsis
4
+
5
+ This library provides a basic parser that implements the majority of the
6
+ [Lucene Query Syntax](http://lucene.apache.org/java/2_9_1/queryparsersyntax.html)
7
+ "specification". Additionally, it includes a `check_lucene_query` script
8
+ to check for errors in a given query.
9
+
10
+ ## Requirements
11
+
12
+ * Ruby 1.8.7 (hasn't been tested elsewhere)
13
+ * [parslet](http://kschiess.github.com/parslet/)
14
+ * [rainbow](https://github.com/sickill/rainbow)
15
+ * Rspec 2 for development
16
+
17
+ ## Install
18
+
19
+ gem install lucene_query_parser
20
+
21
+ ## Usage
22
+
23
+ check_lucene_query --help
24
+
25
+ check_lucene_query query.txt
26
+
27
+ pbpaste | check_lucene_query -
28
+
29
+ ## Development
30
+
31
+ bundle
32
+ rake
33
+
34
+ ## Contributing
35
+
36
+ Fork, patch, test, and send a pull request.
37
+
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core/rake_task'
4
+
5
+ desc 'Default: run specs.'
6
+ task :default => :spec
7
+
8
+ desc "Run specs"
9
+ RSpec::Core::RakeTask.new do |t|
10
+ # t.pattern = "./spec/**/*_spec.rb" # don't need this, it's default.
11
+ # Put spec opts in a file named .rspec in root
12
+ end
13
+
14
+
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ require "lucene_query_parser"
3
+ require "rainbow"
4
+
5
+ if ARGV.include?("-h") || ARGV.include?("--help")
6
+ name = __FILE__
7
+ puts <<-help
8
+ NAME
9
+
10
+ #{name} - check lucene query syntax
11
+
12
+ SYNOPSIS
13
+
14
+ #{name} <filename>
15
+
16
+ ARGUMENTS
17
+
18
+ <filename>
19
+ a file containing a lucene query to parse and validate
20
+ use '-' to read from STDIN
21
+
22
+ EXAMPLES
23
+
24
+ Check a file containing a query:
25
+
26
+ #{name} file_with_query.txt
27
+
28
+ Check the contents of the clipboard (mac):
29
+
30
+ pbpaste | #{name} -
31
+
32
+ help
33
+ exit -1
34
+ end
35
+
36
+ input = nil
37
+
38
+ if input_file = ARGV[0]
39
+ if input_file == "-"
40
+ input = STDIN.read
41
+ else
42
+ input = File.read input_file
43
+ end
44
+ else
45
+ abort "please provide a filename to check or '-' for STDIN"
46
+ end
47
+
48
+ parser = LuceneQueryParser::Parser.new
49
+
50
+
51
+ location = parser.error_location(input)
52
+ if location
53
+ puts location[:message].color(:yellow)
54
+ puts
55
+
56
+ lines = input.split("\n")
57
+ lines.each_with_index do |line, i|
58
+ if i + 1 == location[:line]
59
+ col = location[:column]
60
+ print line[0,col-1]
61
+ print line[col-1, 1].color(:red).background(:yellow)
62
+ puts line[col..-1]
63
+ else
64
+ puts line
65
+ end
66
+ end
67
+
68
+ puts
69
+
70
+ exit -1 # query not ok
71
+
72
+ else
73
+ puts "query ok".color(:green)
74
+ end
75
+
@@ -0,0 +1,122 @@
1
+ module LuceneQueryParser
2
+ class Parser < Parslet::Parser
3
+
4
+ # Public: find and explain errors in a query, if any
5
+ #
6
+ # query - the query to check
7
+ #
8
+ # Returns nil if the query is parseable, or a hash containing information
9
+ # about the invalid query if not.
10
+ def error_location(query)
11
+ parse query
12
+ nil
13
+ rescue Parslet::ParseFailed => error
14
+ cause = find_cause root.error_tree
15
+ cause =~ /line (\d+) char (\d+)/
16
+ {:line => $1.to_i, :column => $2.to_i, :message => cause}
17
+ end
18
+
19
+ # Recursively find a "real" cause within a Parslet error tree. "Real"
20
+ # causes contain line/column positions.
21
+ def find_cause(node)
22
+ if node.parslet.cause
23
+ node.cause
24
+ else
25
+ # go in reverse to find the last thing that failed rather than the first
26
+ node.children.reverse.each do |child|
27
+ if cause = find_cause(child)
28
+ return cause
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ # ----- grammar definition -----
35
+
36
+ root :expr
37
+
38
+ rule :expr do
39
+ space.maybe >>
40
+ operand >> (space >> (operator >> space >> operand | operand)).repeat >>
41
+ space.maybe
42
+ end
43
+
44
+ rule :operator do
45
+ str('AND').as(:op) | str('OR').as(:op)
46
+ end
47
+
48
+ rule :operand do
49
+ unary_operator.maybe >> (
50
+ group |
51
+ field |
52
+ term |
53
+ phrase
54
+ )
55
+ end
56
+
57
+ rule :term do
58
+ match["\\w'"].repeat(1).as(:term) >> (fuzzy | boost).maybe
59
+ end
60
+
61
+ rule :phrase do
62
+ str('"') >> match['^"'].repeat(1).as(:phrase) >> str('"') >>
63
+ (distance | boost).maybe
64
+ end
65
+
66
+ rule :distance do
67
+ str('~') >> match['0-9'].repeat(1).as(:distance)
68
+ end
69
+
70
+ rule :group do
71
+ str('(') >> space.maybe >> expr.as(:group) >> space.maybe >> str(')')
72
+ end
73
+
74
+ rule :field do
75
+ match["\\w"].repeat(1).as(:field) >> str(':') >>
76
+ (
77
+ term | phrase | group |
78
+ inclusive_range.as(:inclusive_range) |
79
+ exclusive_range.as(:exclusive_range)
80
+ )
81
+ end
82
+
83
+ rule :inclusive_range do
84
+ str('[') >> space.maybe >>
85
+ word.as(:from) >> space >> str('TO') >> space >> word.as(:to) >>
86
+ space.maybe >> str(']')
87
+ end
88
+
89
+ rule :exclusive_range do
90
+ str('{') >> space.maybe >>
91
+ word.as(:from) >> space >> str('TO') >> space >> word.as(:to) >>
92
+ space.maybe >> str('}')
93
+ end
94
+
95
+ rule :unary_operator do
96
+ str('+').as(:required) |
97
+ str('-').as(:prohibited) |
98
+ (str('NOT').as(:op) >> space)
99
+ end
100
+
101
+ rule :fuzzy do
102
+ str('~') >>
103
+ ( str('0.') >> match['0-9'].repeat(1) | match['01'] ).maybe.as(:similarity)
104
+ end
105
+
106
+ rule :boost do
107
+ str('^') >> (
108
+ str('0.') >> match['0-9'].repeat(1) |
109
+ match['0-9'].repeat(1)
110
+ ).as(:boost)
111
+ end
112
+
113
+ rule :word do
114
+ match["\\w"].repeat(1)
115
+ end
116
+
117
+ rule :space do
118
+ match["\n \t"].repeat(1)
119
+ end
120
+
121
+ end
122
+ end
@@ -0,0 +1,3 @@
1
+ module LuceneQueryParser
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,9 @@
1
+ require "lucene_query_parser/version"
2
+
3
+ require "parslet"
4
+
5
+ module LuceneQueryParser
6
+ # Your code goes here...
7
+
8
+ require "lucene_query_parser/parser"
9
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "lucene_query_parser/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "lucene_query_parser"
7
+ s.version = LuceneQueryParser::VERSION
8
+ s.authors = ["Nathan Witmer"]
9
+ s.email = ["nwitmer@gmail.com"]
10
+ s.homepage = "https://github.com/aniero/lucene_query_parser"
11
+ s.summary = %q{Lucene query parser and syntax checker}
12
+ s.description = %q{Parser class and syntax checking script for validating Lucene queries}
13
+
14
+ s.rubyforge_project = "lucene_query_parser"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_runtime_dependency "parslet"
22
+ s.add_runtime_dependency "rainbow"
23
+ s.add_development_dependency "rspec", "~> 2.5.0"
24
+ end
@@ -0,0 +1,195 @@
1
+ require "spec_helper"
2
+
3
+ describe LuceneQueryParser::Parser do
4
+ let(:parser) { LuceneQueryParser::Parser.new }
5
+
6
+ describe "#parse" do
7
+ it "parses a term" do
8
+ should parse("foo").as({:term => "foo"})
9
+ end
10
+
11
+ it "parses a phrase" do
12
+ should parse('"foo bar"').as({:phrase => "foo bar"})
13
+ end
14
+
15
+ it "parses a term and a phrase" do
16
+ parse(%q(foo "stuff and things")).as [
17
+ {:term => "foo"},
18
+ {:phrase => "stuff and things"}
19
+ ]
20
+ end
21
+
22
+ it "parses a phrase and two terms" do
23
+ should parse(%q("foo bar" isn't one)).as [
24
+ {:phrase => "foo bar"},
25
+ {:term => "isn't"},
26
+ {:term => "one"}
27
+ ]
28
+ end
29
+
30
+ it "parses multiple phrases" do
31
+ should parse(%q("foo bar"~3 "mumble stuff"~5 "blah blah")).as [
32
+ {:phrase => "foo bar", :distance => "3"},
33
+ {:phrase => "mumble stuff", :distance => "5"},
34
+ {:phrase => "blah blah"}
35
+ ]
36
+ end
37
+
38
+ it "parses a nearness query" do
39
+ should parse(%q("foo bar"~2)).as(
40
+ {:phrase => "foo bar", :distance => "2"}
41
+ )
42
+ end
43
+
44
+ it "parses a paren grouping" do
45
+ should parse(%q((foo bar))).as(
46
+ {:group => [{:term => "foo"}, {:term => "bar"}]}
47
+ )
48
+ end
49
+
50
+ it "parses nested paren groups" do
51
+ should parse(%q((foo (bar (baz))))).as(
52
+ {:group => [
53
+ {:term => "foo"},
54
+ {:group => [
55
+ {:term => "bar"},
56
+ {:group => {:term => "baz"}}
57
+ ]}
58
+ ]}
59
+ )
60
+ end
61
+
62
+ it "parses a required term" do
63
+ should parse("+foo").as({:term => "foo", :required => "+"})
64
+ end
65
+
66
+ it "parses a prohibited term" do
67
+ should parse("-foo").as({:term => "foo", :prohibited => "-"})
68
+ end
69
+
70
+ it "parses prohibited groups and phrases" do
71
+ should parse(%q(+(foo bar) -"mumble stuff")).as [
72
+ {:group => [{:term => "foo"}, {:term => "bar"}], :required => "+"},
73
+ {:phrase => "mumble stuff", :prohibited => "-"}
74
+ ]
75
+ end
76
+
77
+ it "ignores leading spaces" do
78
+ should parse(" foo bar").as [{:term => "foo"}, {:term => "bar"}]
79
+ end
80
+
81
+ it "ignores trailing spaces" do
82
+ should parse("foo bar ").as [{:term => "foo"}, {:term => "bar"}]
83
+ end
84
+
85
+ it "ignores trailing spaces" do
86
+
87
+ end
88
+
89
+ it "parses AND groupings" do
90
+ should parse(%q(foo AND bar)).as [
91
+ {:term => "foo"},
92
+ {:op => "AND", :term => "bar"}
93
+ ]
94
+ end
95
+
96
+ it "parses a sequence of AND and OR" do
97
+ should parse(%q(foo AND bar OR baz OR mumble)).as [
98
+ {:term => "foo"},
99
+ {:op => "AND", :term => "bar"},
100
+ {:op => "OR", :term => "baz"},
101
+ {:op => "OR", :term => "mumble"}
102
+ ]
103
+ end
104
+
105
+ it "parses NOTs" do
106
+ should parse("foo NOT bar").as [
107
+ {:term => "foo"},
108
+ {:term => "bar", :op => "NOT"}
109
+ ]
110
+ end
111
+
112
+ it "parses field:value" do
113
+ should parse("title:foo").as(
114
+ {:field => "title", :term => "foo"}
115
+ )
116
+ end
117
+
118
+ it 'parses field:"a phrase"' do
119
+ should parse('title:"a phrase"').as(
120
+ {:field => "title", :phrase => "a phrase"}
121
+ )
122
+ end
123
+
124
+ it "parses field:(foo AND bar)" do
125
+ should parse('title:(foo AND bar)').as(
126
+ {:field => "title", :group => [
127
+ {:term => "foo"},
128
+ {:op => "AND", :term => "bar"}
129
+ ]}
130
+ )
131
+ end
132
+
133
+ it "parses fuzzy terms" do
134
+ should parse('fuzzy~').as(
135
+ {:term => "fuzzy", :similarity => nil}
136
+ )
137
+ end
138
+
139
+ it "parses a fuzzy similarity of 0" do
140
+ should parse('fuzzy~0').as(
141
+ {:term => "fuzzy", :similarity => "0"}
142
+ )
143
+ end
144
+
145
+ it "parses a fuzzy similarity of 1" do
146
+ should parse('fuzzy~1').as(
147
+ {:term => "fuzzy", :similarity => "1"}
148
+ )
149
+ end
150
+
151
+ it "parses a fuzzy similarity of 0.8" do
152
+ should parse('fuzzy~0.8').as(
153
+ {:term => "fuzzy", :similarity => "0.8"}
154
+ )
155
+ end
156
+
157
+ it { should parse('year:[2010 TO 2011]').as(
158
+ {:field => "year", :inclusive_range => {:from => "2010", :to => "2011"}}
159
+ ) }
160
+
161
+ it { should parse('year:{2009 TO 2012}').as(
162
+ {:field => "year", :exclusive_range => {:from => "2009", :to => "2012"}}
163
+ ) }
164
+
165
+ it { should parse('boosted^1').as({:term => "boosted", :boost => "1"})}
166
+ it { should parse('boosted^0.1').as({:term => "boosted", :boost => "0.1"})}
167
+
168
+ it { should parse('boosted^10 normal').as([
169
+ {:term => "boosted", :boost => "10"},
170
+ {:term => "normal"}
171
+ ])}
172
+
173
+ it { should parse('"boosted phrase"^10 "normal phrase"').as([
174
+ {:phrase => "boosted phrase", :boost => "10"},
175
+ {:phrase => "normal phrase"}
176
+ ])}
177
+
178
+ end
179
+
180
+ describe "#error_location" do
181
+ let(:parser) { LuceneQueryParser::Parser.new }
182
+
183
+ it "returns nil for a valid query" do
184
+ parser.error_location("valid query").should be_nil
185
+ end
186
+
187
+ it "returns a hash with the line and column for an invalid query" do
188
+ error = parser.error_location("invalid^ query")
189
+ error[:line].should == 1
190
+ error[:column].should == 8
191
+ error[:message].should =~ /Expected/
192
+ end
193
+ end
194
+
195
+ end
@@ -0,0 +1,6 @@
1
+ require "lucene_query_parser"
2
+
3
+ require "parslet/rig/rspec"
4
+
5
+ RSpec.configure do |config|
6
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lucene_query_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Nathan Witmer
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-08-24 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: parslet
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: rainbow
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ hash: 27
57
+ segments:
58
+ - 2
59
+ - 5
60
+ - 0
61
+ version: 2.5.0
62
+ type: :development
63
+ version_requirements: *id003
64
+ description: Parser class and syntax checking script for validating Lucene queries
65
+ email:
66
+ - nwitmer@gmail.com
67
+ executables:
68
+ - check_lucene_query
69
+ extensions: []
70
+
71
+ extra_rdoc_files: []
72
+
73
+ files:
74
+ - .gitignore
75
+ - .rspec
76
+ - Gemfile
77
+ - Guardfile
78
+ - README.md
79
+ - Rakefile
80
+ - bin/check_lucene_query
81
+ - lib/lucene_query_parser.rb
82
+ - lib/lucene_query_parser/parser.rb
83
+ - lib/lucene_query_parser/version.rb
84
+ - lucene_query_parser.gemspec
85
+ - spec/lucene_query_parser/parser_spec.rb
86
+ - spec/spec_helper.rb
87
+ homepage: https://github.com/aniero/lucene_query_parser
88
+ licenses: []
89
+
90
+ post_install_message:
91
+ rdoc_options: []
92
+
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ hash: 3
101
+ segments:
102
+ - 0
103
+ version: "0"
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ hash: 3
110
+ segments:
111
+ - 0
112
+ version: "0"
113
+ requirements: []
114
+
115
+ rubyforge_project: lucene_query_parser
116
+ rubygems_version: 1.8.7
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: Lucene query parser and syntax checker
120
+ test_files:
121
+ - spec/lucene_query_parser/parser_spec.rb
122
+ - spec/spec_helper.rb