lucene_query_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ -f doc
2
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in lucene_query_parser.gemspec
4
+ gemspec
5
+
6
+ group "development" do
7
+ gem "guard"
8
+ gem "guard-rspec"
9
+
10
+ if PLATFORM =~ /darwin/
11
+ gem "rb-fsevent"
12
+ gem "growl_notify"
13
+ end
14
+ end
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ guard 'rspec', :version => 2, :cli => "--format doc --colour" do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec/" }
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec/" }
8
+ end
9
+
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # Lucene Query Parser
2
+
3
+ ## Synopsis
4
+
5
+ This library provides a basic parser that implements the majority of the
6
+ [Lucene Query Syntax](http://lucene.apache.org/java/2_9_1/queryparsersyntax.html)
7
+ "specification". Additionally, it includes a `check_lucene_query` script
8
+ to check for errors in a given query.
9
+
10
+ ## Requirements
11
+
12
+ * Ruby 1.8.7 (hasn't been tested elsewhere)
13
+ * [parslet](http://kschiess.github.com/parslet/)
14
+ * [rainbow](https://github.com/sickill/rainbow)
15
+ * Rspec 2 for development
16
+
17
+ ## Install
18
+
19
+ gem install lucene_query_parser
20
+
21
+ ## Usage
22
+
23
+ check_lucene_query --help
24
+
25
+ check_lucene_query query.txt
26
+
27
+ pbpaste | check_lucene_query -
28
+
29
+ ## Development
30
+
31
+ bundle
32
+ rake
33
+
34
+ ## Contributing
35
+
36
+ Fork, patch, test, and send a pull request.
37
+
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core/rake_task'
4
+
5
+ desc 'Default: run specs.'
6
+ task :default => :spec
7
+
8
+ desc "Run specs"
9
+ RSpec::Core::RakeTask.new do |t|
10
+ # t.pattern = "./spec/**/*_spec.rb" # don't need this, it's default.
11
+ # Put spec opts in a file named .rspec in root
12
+ end
13
+
14
+
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ require "lucene_query_parser"
3
+ require "rainbow"
4
+
5
+ if ARGV.include?("-h") || ARGV.include?("--help")
6
+ name = __FILE__
7
+ puts <<-help
8
+ NAME
9
+
10
+ #{name} - check lucene query syntax
11
+
12
+ SYNOPSIS
13
+
14
+ #{name} <filename>
15
+
16
+ ARGUMENTS
17
+
18
+ <filename>
19
+ a file containing a lucene query to parse and validate
20
+ use '-' to read from STDIN
21
+
22
+ EXAMPLES
23
+
24
+ Check a file containing a query:
25
+
26
+ #{name} file_with_query.txt
27
+
28
+ Check the contents of the clipboard (mac):
29
+
30
+ pbpaste | #{name} -
31
+
32
+ help
33
+ exit -1
34
+ end
35
+
36
+ input = nil
37
+
38
+ if input_file = ARGV[0]
39
+ if input_file == "-"
40
+ input = STDIN.read
41
+ else
42
+ input = File.read input_file
43
+ end
44
+ else
45
+ abort "please provide a filename to check or '-' for STDIN"
46
+ end
47
+
48
+ parser = LuceneQueryParser::Parser.new
49
+
50
+
51
+ location = parser.error_location(input)
52
+ if location
53
+ puts location[:message].color(:yellow)
54
+ puts
55
+
56
+ lines = input.split("\n")
57
+ lines.each_with_index do |line, i|
58
+ if i + 1 == location[:line]
59
+ col = location[:column]
60
+ print line[0,col-1]
61
+ print line[col-1, 1].color(:red).background(:yellow)
62
+ puts line[col..-1]
63
+ else
64
+ puts line
65
+ end
66
+ end
67
+
68
+ puts
69
+
70
+ exit -1 # query not ok
71
+
72
+ else
73
+ puts "query ok".color(:green)
74
+ end
75
+
@@ -0,0 +1,122 @@
1
+ module LuceneQueryParser
2
+ class Parser < Parslet::Parser
3
+
4
+ # Public: find and explain errors in a query, if any
5
+ #
6
+ # query - the query to check
7
+ #
8
+ # Returns nil if the query is parseable, or a hash containing information
9
+ # about the invalid query if not.
10
+ def error_location(query)
11
+ parse query
12
+ nil
13
+ rescue Parslet::ParseFailed => error
14
+ cause = find_cause root.error_tree
15
+ cause =~ /line (\d+) char (\d+)/
16
+ {:line => $1.to_i, :column => $2.to_i, :message => cause}
17
+ end
18
+
19
+ # Recursively find a "real" cause within a Parslet error tree. "Real"
20
+ # causes contain line/column positions.
21
+ def find_cause(node)
22
+ if node.parslet.cause
23
+ node.cause
24
+ else
25
+ # go in reverse to find the last thing that failed rather than the first
26
+ node.children.reverse.each do |child|
27
+ if cause = find_cause(child)
28
+ return cause
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ # ----- grammar definition -----
35
+
36
+ root :expr
37
+
38
+ rule :expr do
39
+ space.maybe >>
40
+ operand >> (space >> (operator >> space >> operand | operand)).repeat >>
41
+ space.maybe
42
+ end
43
+
44
+ rule :operator do
45
+ str('AND').as(:op) | str('OR').as(:op)
46
+ end
47
+
48
+ rule :operand do
49
+ unary_operator.maybe >> (
50
+ group |
51
+ field |
52
+ term |
53
+ phrase
54
+ )
55
+ end
56
+
57
+ rule :term do
58
+ match["\\w'"].repeat(1).as(:term) >> (fuzzy | boost).maybe
59
+ end
60
+
61
+ rule :phrase do
62
+ str('"') >> match['^"'].repeat(1).as(:phrase) >> str('"') >>
63
+ (distance | boost).maybe
64
+ end
65
+
66
+ rule :distance do
67
+ str('~') >> match['0-9'].repeat(1).as(:distance)
68
+ end
69
+
70
+ rule :group do
71
+ str('(') >> space.maybe >> expr.as(:group) >> space.maybe >> str(')')
72
+ end
73
+
74
+ rule :field do
75
+ match["\\w"].repeat(1).as(:field) >> str(':') >>
76
+ (
77
+ term | phrase | group |
78
+ inclusive_range.as(:inclusive_range) |
79
+ exclusive_range.as(:exclusive_range)
80
+ )
81
+ end
82
+
83
+ rule :inclusive_range do
84
+ str('[') >> space.maybe >>
85
+ word.as(:from) >> space >> str('TO') >> space >> word.as(:to) >>
86
+ space.maybe >> str(']')
87
+ end
88
+
89
+ rule :exclusive_range do
90
+ str('{') >> space.maybe >>
91
+ word.as(:from) >> space >> str('TO') >> space >> word.as(:to) >>
92
+ space.maybe >> str('}')
93
+ end
94
+
95
+ rule :unary_operator do
96
+ str('+').as(:required) |
97
+ str('-').as(:prohibited) |
98
+ (str('NOT').as(:op) >> space)
99
+ end
100
+
101
+ rule :fuzzy do
102
+ str('~') >>
103
+ ( str('0.') >> match['0-9'].repeat(1) | match['01'] ).maybe.as(:similarity)
104
+ end
105
+
106
+ rule :boost do
107
+ str('^') >> (
108
+ str('0.') >> match['0-9'].repeat(1) |
109
+ match['0-9'].repeat(1)
110
+ ).as(:boost)
111
+ end
112
+
113
+ rule :word do
114
+ match["\\w"].repeat(1)
115
+ end
116
+
117
+ rule :space do
118
+ match["\n \t"].repeat(1)
119
+ end
120
+
121
+ end
122
+ end
@@ -0,0 +1,3 @@
1
+ module LuceneQueryParser
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,9 @@
1
+ require "lucene_query_parser/version"
2
+
3
+ require "parslet"
4
+
5
+ module LuceneQueryParser
6
+ # Your code goes here...
7
+
8
+ require "lucene_query_parser/parser"
9
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "lucene_query_parser/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "lucene_query_parser"
7
+ s.version = LuceneQueryParser::VERSION
8
+ s.authors = ["Nathan Witmer"]
9
+ s.email = ["nwitmer@gmail.com"]
10
+ s.homepage = "https://github.com/aniero/lucene_query_parser"
11
+ s.summary = %q{Lucene query parser and syntax checker}
12
+ s.description = %q{Parser class and syntax checking script for validating Lucene queries}
13
+
14
+ s.rubyforge_project = "lucene_query_parser"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_runtime_dependency "parslet"
22
+ s.add_runtime_dependency "rainbow"
23
+ s.add_development_dependency "rspec", "~> 2.5.0"
24
+ end
@@ -0,0 +1,195 @@
1
+ require "spec_helper"
2
+
3
+ describe LuceneQueryParser::Parser do
4
+ let(:parser) { LuceneQueryParser::Parser.new }
5
+
6
+ describe "#parse" do
7
+ it "parses a term" do
8
+ should parse("foo").as({:term => "foo"})
9
+ end
10
+
11
+ it "parses a phrase" do
12
+ should parse('"foo bar"').as({:phrase => "foo bar"})
13
+ end
14
+
15
+ it "parses a term and a phrase" do
16
+ parse(%q(foo "stuff and things")).as [
17
+ {:term => "foo"},
18
+ {:phrase => "stuff and things"}
19
+ ]
20
+ end
21
+
22
+ it "parses a phrase and two terms" do
23
+ should parse(%q("foo bar" isn't one)).as [
24
+ {:phrase => "foo bar"},
25
+ {:term => "isn't"},
26
+ {:term => "one"}
27
+ ]
28
+ end
29
+
30
+ it "parses multiple phrases" do
31
+ should parse(%q("foo bar"~3 "mumble stuff"~5 "blah blah")).as [
32
+ {:phrase => "foo bar", :distance => "3"},
33
+ {:phrase => "mumble stuff", :distance => "5"},
34
+ {:phrase => "blah blah"}
35
+ ]
36
+ end
37
+
38
+ it "parses a nearness query" do
39
+ should parse(%q("foo bar"~2)).as(
40
+ {:phrase => "foo bar", :distance => "2"}
41
+ )
42
+ end
43
+
44
+ it "parses a paren grouping" do
45
+ should parse(%q((foo bar))).as(
46
+ {:group => [{:term => "foo"}, {:term => "bar"}]}
47
+ )
48
+ end
49
+
50
+ it "parses nested paren groups" do
51
+ should parse(%q((foo (bar (baz))))).as(
52
+ {:group => [
53
+ {:term => "foo"},
54
+ {:group => [
55
+ {:term => "bar"},
56
+ {:group => {:term => "baz"}}
57
+ ]}
58
+ ]}
59
+ )
60
+ end
61
+
62
+ it "parses a required term" do
63
+ should parse("+foo").as({:term => "foo", :required => "+"})
64
+ end
65
+
66
+ it "parses a prohibited term" do
67
+ should parse("-foo").as({:term => "foo", :prohibited => "-"})
68
+ end
69
+
70
+ it "parses prohibited groups and phrases" do
71
+ should parse(%q(+(foo bar) -"mumble stuff")).as [
72
+ {:group => [{:term => "foo"}, {:term => "bar"}], :required => "+"},
73
+ {:phrase => "mumble stuff", :prohibited => "-"}
74
+ ]
75
+ end
76
+
77
+ it "ignores leading spaces" do
78
+ should parse(" foo bar").as [{:term => "foo"}, {:term => "bar"}]
79
+ end
80
+
81
+ it "ignores trailing spaces" do
82
+ should parse("foo bar ").as [{:term => "foo"}, {:term => "bar"}]
83
+ end
84
+
85
+ it "ignores trailing spaces" do
86
+
87
+ end
88
+
89
+ it "parses AND groupings" do
90
+ should parse(%q(foo AND bar)).as [
91
+ {:term => "foo"},
92
+ {:op => "AND", :term => "bar"}
93
+ ]
94
+ end
95
+
96
+ it "parses a sequence of AND and OR" do
97
+ should parse(%q(foo AND bar OR baz OR mumble)).as [
98
+ {:term => "foo"},
99
+ {:op => "AND", :term => "bar"},
100
+ {:op => "OR", :term => "baz"},
101
+ {:op => "OR", :term => "mumble"}
102
+ ]
103
+ end
104
+
105
+ it "parses NOTs" do
106
+ should parse("foo NOT bar").as [
107
+ {:term => "foo"},
108
+ {:term => "bar", :op => "NOT"}
109
+ ]
110
+ end
111
+
112
+ it "parses field:value" do
113
+ should parse("title:foo").as(
114
+ {:field => "title", :term => "foo"}
115
+ )
116
+ end
117
+
118
+ it 'parses field:"a phrase"' do
119
+ should parse('title:"a phrase"').as(
120
+ {:field => "title", :phrase => "a phrase"}
121
+ )
122
+ end
123
+
124
+ it "parses field:(foo AND bar)" do
125
+ should parse('title:(foo AND bar)').as(
126
+ {:field => "title", :group => [
127
+ {:term => "foo"},
128
+ {:op => "AND", :term => "bar"}
129
+ ]}
130
+ )
131
+ end
132
+
133
+ it "parses fuzzy terms" do
134
+ should parse('fuzzy~').as(
135
+ {:term => "fuzzy", :similarity => nil}
136
+ )
137
+ end
138
+
139
+ it "parses a fuzzy similarity of 0" do
140
+ should parse('fuzzy~0').as(
141
+ {:term => "fuzzy", :similarity => "0"}
142
+ )
143
+ end
144
+
145
+ it "parses a fuzzy similarity of 1" do
146
+ should parse('fuzzy~1').as(
147
+ {:term => "fuzzy", :similarity => "1"}
148
+ )
149
+ end
150
+
151
+ it "parses a fuzzy similarity of 0.8" do
152
+ should parse('fuzzy~0.8').as(
153
+ {:term => "fuzzy", :similarity => "0.8"}
154
+ )
155
+ end
156
+
157
+ it { should parse('year:[2010 TO 2011]').as(
158
+ {:field => "year", :inclusive_range => {:from => "2010", :to => "2011"}}
159
+ ) }
160
+
161
+ it { should parse('year:{2009 TO 2012}').as(
162
+ {:field => "year", :exclusive_range => {:from => "2009", :to => "2012"}}
163
+ ) }
164
+
165
+ it { should parse('boosted^1').as({:term => "boosted", :boost => "1"})}
166
+ it { should parse('boosted^0.1').as({:term => "boosted", :boost => "0.1"})}
167
+
168
+ it { should parse('boosted^10 normal').as([
169
+ {:term => "boosted", :boost => "10"},
170
+ {:term => "normal"}
171
+ ])}
172
+
173
+ it { should parse('"boosted phrase"^10 "normal phrase"').as([
174
+ {:phrase => "boosted phrase", :boost => "10"},
175
+ {:phrase => "normal phrase"}
176
+ ])}
177
+
178
+ end
179
+
180
+ describe "#error_location" do
181
+ let(:parser) { LuceneQueryParser::Parser.new }
182
+
183
+ it "returns nil for a valid query" do
184
+ parser.error_location("valid query").should be_nil
185
+ end
186
+
187
+ it "returns a hash with the line and column for an invalid query" do
188
+ error = parser.error_location("invalid^ query")
189
+ error[:line].should == 1
190
+ error[:column].should == 8
191
+ error[:message].should =~ /Expected/
192
+ end
193
+ end
194
+
195
+ end
@@ -0,0 +1,6 @@
1
+ require "lucene_query_parser"
2
+
3
+ require "parslet/rig/rspec"
4
+
5
+ RSpec.configure do |config|
6
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lucene_query_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Nathan Witmer
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-08-24 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: parslet
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: rainbow
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ hash: 27
57
+ segments:
58
+ - 2
59
+ - 5
60
+ - 0
61
+ version: 2.5.0
62
+ type: :development
63
+ version_requirements: *id003
64
+ description: Parser class and syntax checking script for validating Lucene queries
65
+ email:
66
+ - nwitmer@gmail.com
67
+ executables:
68
+ - check_lucene_query
69
+ extensions: []
70
+
71
+ extra_rdoc_files: []
72
+
73
+ files:
74
+ - .gitignore
75
+ - .rspec
76
+ - Gemfile
77
+ - Guardfile
78
+ - README.md
79
+ - Rakefile
80
+ - bin/check_lucene_query
81
+ - lib/lucene_query_parser.rb
82
+ - lib/lucene_query_parser/parser.rb
83
+ - lib/lucene_query_parser/version.rb
84
+ - lucene_query_parser.gemspec
85
+ - spec/lucene_query_parser/parser_spec.rb
86
+ - spec/spec_helper.rb
87
+ homepage: https://github.com/aniero/lucene_query_parser
88
+ licenses: []
89
+
90
+ post_install_message:
91
+ rdoc_options: []
92
+
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ hash: 3
101
+ segments:
102
+ - 0
103
+ version: "0"
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ hash: 3
110
+ segments:
111
+ - 0
112
+ version: "0"
113
+ requirements: []
114
+
115
+ rubyforge_project: lucene_query_parser
116
+ rubygems_version: 1.8.7
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: Lucene query parser and syntax checker
120
+ test_files:
121
+ - spec/lucene_query_parser/parser_spec.rb
122
+ - spec/spec_helper.rb