textquery 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +51 -0
- data/Rakefile +19 -0
- data/VERSION +1 -0
- data/lib/textquery.rb +3 -0
- data/lib/textquery/textquery.rb +40 -0
- data/lib/textquery/textquery_grammar.treetop +77 -0
- data/spec/textquery_spec.rb +174 -0
- metadata +70 -0
data/README.rdoc
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
= TextQuery
|
2
|
+
|
3
|
+
Does it match? When regular expressions are not enough, textquery is the answer. For
|
4
|
+
example, regular expressions cannot evaluate recursive rules and often result in
|
5
|
+
overly verbose and complicated expressions.
|
6
|
+
|
7
|
+
Textquery is a simple PEG grammar with support for:
|
8
|
+
- AND (spaces are implicit AND's)
|
9
|
+
- OR
|
10
|
+
- NOT (- is an alias)
|
11
|
+
- 'quoted strings'
|
12
|
+
|
13
|
+
== Example
|
14
|
+
|
15
|
+
TextQuery.new("'to be' OR NOT 'to_be'").match?("to be") # => true
|
16
|
+
|
17
|
+
TextQuery.new("-test").match?("some string of text") # => true
|
18
|
+
TextQuery.new("NOT test").match?("some string of text") # => true
|
19
|
+
|
20
|
+
TextQuery.new("a AND b").match?("b a") # => true
|
21
|
+
TextQuery.new("a AND b").match?("a c") # => false
|
22
|
+
|
23
|
+
q = TextQuery.new("a AND (b AND NOT (c OR d))")
|
24
|
+
q.match?("d a b") # => false
|
25
|
+
q.match?("b") # => false
|
26
|
+
q.match?("a b cdefg") # => true
|
27
|
+
|
28
|
+
== License
|
29
|
+
|
30
|
+
(The MIT License)
|
31
|
+
|
32
|
+
Copyright (c) 2009 Ilya Grigorik
|
33
|
+
|
34
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
35
|
+
a copy of this software and associated documentation files (the
|
36
|
+
'Software'), to deal in the Software without restriction, including
|
37
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
38
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
39
|
+
permit persons to whom the Software is furnished to do so, subject to
|
40
|
+
the following conditions:
|
41
|
+
|
42
|
+
The above copyright notice and this permission notice shall be
|
43
|
+
included in all copies or substantial portions of the Software.
|
44
|
+
|
45
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
46
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
47
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
48
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
49
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
50
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
51
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gemspec|
|
6
|
+
gemspec.name = "textquery"
|
7
|
+
gemspec.summary = "Evaluate any text against a collection of match rules"
|
8
|
+
gemspec.description = gemspec.summary
|
9
|
+
gemspec.email = "ilya@igvita.com"
|
10
|
+
gemspec.homepage = "http://github.com/igrigorik/textquery"
|
11
|
+
gemspec.authors = ["Ilya Grigorik"]
|
12
|
+
gemspec.add_dependency("treetop")
|
13
|
+
gemspec.rubyforge_project = "textquery"
|
14
|
+
end
|
15
|
+
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
19
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/textquery.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'treetop'
|
2
|
+
|
3
|
+
class WordMatch < Treetop::Runtime::SyntaxNode
|
4
|
+
def eval(text)
|
5
|
+
not text.match(/^#{query}\s|\s#{query}\s|\s#{query}$|^#{query}$/).nil?
|
6
|
+
end
|
7
|
+
|
8
|
+
def query
|
9
|
+
Regexp.escape(text_value)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Treetop.load File.dirname(__FILE__) + "/textquery_grammar"
|
14
|
+
|
15
|
+
class TextQuery
|
16
|
+
def initialize(query = '')
|
17
|
+
@parser = TextQueryGrammarParser.new
|
18
|
+
@query = nil
|
19
|
+
|
20
|
+
parse(query) if not query.empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse(query)
|
24
|
+
@query = @parser.parse(query)
|
25
|
+
if not @query
|
26
|
+
puts @parser.terminal_failures.join("\n")
|
27
|
+
end
|
28
|
+
@query
|
29
|
+
end
|
30
|
+
|
31
|
+
def eval(input)
|
32
|
+
if @query
|
33
|
+
@query.eval(input)
|
34
|
+
else
|
35
|
+
puts 'no query specified'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
alias :match? :eval
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
grammar TextQueryGrammar
|
2
|
+
|
3
|
+
rule expression
|
4
|
+
logical / value
|
5
|
+
end
|
6
|
+
|
7
|
+
rule logical
|
8
|
+
op1:value space operator:binary space op2:expression {
|
9
|
+
def eval(text)
|
10
|
+
operator.eval(op1.eval(text), op2.eval(text))
|
11
|
+
end
|
12
|
+
}
|
13
|
+
/
|
14
|
+
op1:value [\s]+ op2:expression {
|
15
|
+
def eval(text)
|
16
|
+
op1.eval(text) && op2.eval(text)
|
17
|
+
end
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
rule binary
|
22
|
+
'AND' {
|
23
|
+
def eval(a,b)
|
24
|
+
a && b
|
25
|
+
end
|
26
|
+
}
|
27
|
+
/
|
28
|
+
'OR' {
|
29
|
+
def eval(a,b)
|
30
|
+
a || b
|
31
|
+
end
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
rule unary
|
36
|
+
('-' / 'NOT') {
|
37
|
+
def eval(a)
|
38
|
+
not a
|
39
|
+
end
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
rule space
|
44
|
+
[\s]*
|
45
|
+
end
|
46
|
+
|
47
|
+
rule word
|
48
|
+
[^\s\(\)]+ <WordMatch>
|
49
|
+
end
|
50
|
+
|
51
|
+
rule words
|
52
|
+
[^\']+ <WordMatch>
|
53
|
+
end
|
54
|
+
|
55
|
+
rule value
|
56
|
+
'(' space expression space ')' {
|
57
|
+
def eval(text)
|
58
|
+
expression.eval(text)
|
59
|
+
end
|
60
|
+
}
|
61
|
+
/
|
62
|
+
operator:unary space value {
|
63
|
+
def eval(text)
|
64
|
+
operator.eval(value.eval(text))
|
65
|
+
end
|
66
|
+
}
|
67
|
+
/
|
68
|
+
"'" space words space "'" {
|
69
|
+
def eval(text)
|
70
|
+
words.eval(text)
|
71
|
+
end
|
72
|
+
}
|
73
|
+
/
|
74
|
+
word
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "spec"
|
3
|
+
require "pp"
|
4
|
+
|
5
|
+
require "lib/textquery"
|
6
|
+
|
7
|
+
# Resources:
|
8
|
+
# - http://github.com/nathansobo/treetop
|
9
|
+
# - http://github.com/nathansobo/treetop/blob/master/examples/lambda_calculus/arithmetic.treetop
|
10
|
+
# - http://snippets.dzone.com/tag/Treetop
|
11
|
+
# - http://treetop.rubyforge.org/index.html
|
12
|
+
# - http://en.wikipedia.org/wiki/Parsing_expression_grammar
|
13
|
+
#
|
14
|
+
|
15
|
+
describe TextQuery do
|
16
|
+
before(:all) do
|
17
|
+
@parser = TextQuery.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse(input)
|
21
|
+
result = @parser.parse(input)
|
22
|
+
unless result
|
23
|
+
puts @parser.terminal_failures.join("\n")
|
24
|
+
end
|
25
|
+
result
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should accept any non space separated sequence" do
|
29
|
+
%w[query 123 text123 #tag $%*].each do |input|
|
30
|
+
@parser.parse(input).text_value.should == input
|
31
|
+
parse(input).eval(input).should be_true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should look for exact word boundary match" do
|
36
|
+
parse("text").eval("textstring").should be_false
|
37
|
+
parse("text").eval("stringtext").should be_false
|
38
|
+
parse("text").eval("some textstring").should be_false
|
39
|
+
parse("text").eval("string of texts stuff").should be_false
|
40
|
+
parse("$^").eval("string of $^* stuff").should be_false
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should accept logical AND" do
|
44
|
+
parse("a AND b").eval("c").should be_false
|
45
|
+
parse("a AND b").eval("a").should be_false
|
46
|
+
parse("a AND b").eval("b").should be_false
|
47
|
+
|
48
|
+
parse("a AND b").eval("a b").should be_true
|
49
|
+
parse("a AND b").eval("a c b").should be_true
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should accept logical OR" do
|
53
|
+
parse("a OR b").eval("c").should be_false
|
54
|
+
parse("a OR b").eval("a").should be_true
|
55
|
+
parse("a OR b").eval("b").should be_true
|
56
|
+
|
57
|
+
parse("a OR b").eval("a b").should be_true
|
58
|
+
parse("a OR b").eval("a c b").should be_true
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should give precedence to AND" do
|
62
|
+
# a AND (b OR c) == a AND b OR c
|
63
|
+
parse("a AND b OR c").eval("a b c").should be_true
|
64
|
+
parse("a AND b OR c").eval("a b").should be_true
|
65
|
+
parse("a AND b OR c").eval("a c").should be_true
|
66
|
+
|
67
|
+
parse("a AND b OR c").eval("b c").should be_false
|
68
|
+
parse("a AND b OR c").eval("c").should be_false
|
69
|
+
parse("a AND b OR c").eval("b").should be_false
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should accept logical NOT" do
|
73
|
+
%w[- NOT].each do |operator|
|
74
|
+
parse("#{operator} a").eval("a").should be_false
|
75
|
+
parse("#{operator} #{operator} a").eval("a").should be_true
|
76
|
+
|
77
|
+
parse("#{operator} a OR a").eval("a").should be_true
|
78
|
+
parse("a OR #{operator} a").eval("a").should be_true
|
79
|
+
|
80
|
+
parse("b AND #{operator} a").eval("b").should be_true
|
81
|
+
parse("b AND #{operator} a").eval("a").should be_false
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should evaluate sub expressions" do
|
86
|
+
parse("(a AND b)").eval("a b").should be_true
|
87
|
+
parse("(a OR b)").eval("a b").should be_true
|
88
|
+
parse("(a AND NOT b)").eval("a b").should be_false
|
89
|
+
|
90
|
+
parse("(a AND b) OR c").eval("a b c").should be_true
|
91
|
+
parse("(a AND b) OR c").eval("a b").should be_true
|
92
|
+
parse("(a AND b) OR c").eval("a c").should be_true
|
93
|
+
|
94
|
+
parse("(a AND b) OR c").eval("c").should be_true
|
95
|
+
parse("a AND (b OR c)").eval("c").should be_false
|
96
|
+
|
97
|
+
# for the win...
|
98
|
+
parse("a AND (b AND (c OR d))").eval("d a b").should be_true
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should not trip up on placement of brackets" do
|
102
|
+
parse("a AND (-b)").eval("a b").should == parse("a AND -(b)").eval("a b")
|
103
|
+
parse("(-a) AND b").eval("a b").should == parse("-(a) AND b").eval("a b")
|
104
|
+
parse("-(a) AND -(b)").eval("a b").should == parse("(-a) AND (-b)").eval("a b")
|
105
|
+
|
106
|
+
parse("a OR (-b)").eval("a b").should == parse("a OR -(b)").eval("a b")
|
107
|
+
parse("(-a) OR b").eval("a b").should == parse("-(a) OR b").eval("a b")
|
108
|
+
parse("(-a) OR (-b)").eval("a b").should == parse("-(a) OR -(b)").eval("a b")
|
109
|
+
|
110
|
+
parse("a AND (b OR c)").eval("a b c").should be_true
|
111
|
+
parse("a AND (b OR c)").eval("a b").should be_true
|
112
|
+
parse("a AND (b OR c)").eval("a c").should be_true
|
113
|
+
|
114
|
+
parse("(NOT a) OR a").eval("a").should be_true
|
115
|
+
parse("(NOT a) AND (NOT b) AND (NOT c)").eval("b").should be_false
|
116
|
+
parse("a AND (b AND (c OR NOT d))").eval("a b d").should be_false
|
117
|
+
parse("a AND (b AND (c OR NOT d))").eval("a b c").should be_true
|
118
|
+
parse("a AND (b AND (c OR NOT d))").eval("a b e").should be_true
|
119
|
+
|
120
|
+
parse("a AND (b AND NOT (c OR d))").eval("a b").should be_true
|
121
|
+
parse("a AND (b AND NOT (c OR d))").eval("a b c").should be_false
|
122
|
+
parse("a AND (b AND NOT (c OR d))").eval("a b d").should be_false
|
123
|
+
|
124
|
+
parse("-a AND -b AND -c").eval("e").should be_true
|
125
|
+
parse("(-a) AND (-b) AND (-c)").eval("e").should be_true
|
126
|
+
parse("(NOT a) AND (NOT b) AND (NOT c)").eval("e").should be_true
|
127
|
+
parse("NOT a AND NOT b AND NOT c").eval("e").should be_true
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should accept quoted strings" do
|
131
|
+
parse("'some text'").eval("some text").should be_true
|
132
|
+
parse("'some text string'").eval("some text").should be_false
|
133
|
+
|
134
|
+
parse("'some text string'").eval("some text 1 string").should be_false
|
135
|
+
parse("-'some text string'").eval("some text 1 string").should be_true
|
136
|
+
|
137
|
+
parse("a AND -'a b'").eval("a b c").should be_false
|
138
|
+
parse("a AND -'a b'").eval("a c b").should be_true
|
139
|
+
|
140
|
+
parse("(a OR b) AND (-'a b c')").eval("a b c").should be_false
|
141
|
+
parse("(a OR b) AND (-'a b c')").eval("a c b").should be_true
|
142
|
+
parse("(a AND b) AND (-'a b c')").eval("a c b").should be_true
|
143
|
+
|
144
|
+
# shakespeare got nothin' on ruby...
|
145
|
+
parse("'to be' OR NOT 'to be'").eval("to be").should be_true
|
146
|
+
end
|
147
|
+
|
148
|
+
it "should treat spaces as implicit ANDs" do
|
149
|
+
parse("a b").eval("a c b").should be_true
|
150
|
+
parse("b a c").eval("a c b").should be_true
|
151
|
+
parse("b a c").eval("a c").should be_false
|
152
|
+
|
153
|
+
parse("some text AND 'exact match'").eval("some exact match text").should be_true
|
154
|
+
parse("some text AND 'exact match'").eval("some exact text match").should be_false
|
155
|
+
|
156
|
+
parse("some text AND -'exact match'").eval("some exact text match").should be_true
|
157
|
+
parse("some text AND -'exact match'").eval("some exact match").should be_false
|
158
|
+
end
|
159
|
+
|
160
|
+
it "should wrap the grammar API" do
|
161
|
+
TextQuery.new("'to be' OR NOT 'to_be'").match?("to be").should be_true
|
162
|
+
TextQuery.new("-test").match?("some string of text").should be_true
|
163
|
+
TextQuery.new("NOT test").match?("some string of text").should be_true
|
164
|
+
TextQuery.new("a AND b").match?("b a").should be_true
|
165
|
+
TextQuery.new("a AND b").match?("a c").should be_false
|
166
|
+
|
167
|
+
q = TextQuery.new("a AND (b AND NOT (c OR d))")
|
168
|
+
q.match?("d a b").should be_false
|
169
|
+
q.match?("b").should be_false
|
170
|
+
q.match?("a b cdefg").should be_true
|
171
|
+
q.eval("a b cdefg").should be_true
|
172
|
+
|
173
|
+
end
|
174
|
+
end
|
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: textquery
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ilya Grigorik
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-28 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: treetop
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: Evaluate any text against a collection of match rules
|
26
|
+
email: ilya@igvita.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README.rdoc
|
33
|
+
files:
|
34
|
+
- README.rdoc
|
35
|
+
- Rakefile
|
36
|
+
- VERSION
|
37
|
+
- lib/textquery.rb
|
38
|
+
- lib/textquery/textquery.rb
|
39
|
+
- lib/textquery/textquery_grammar.treetop
|
40
|
+
- spec/textquery_spec.rb
|
41
|
+
has_rdoc: true
|
42
|
+
homepage: http://github.com/igrigorik/textquery
|
43
|
+
licenses: []
|
44
|
+
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options:
|
47
|
+
- --charset=UTF-8
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
requirements: []
|
63
|
+
|
64
|
+
rubyforge_project: textquery
|
65
|
+
rubygems_version: 1.3.5
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Evaluate any text against a collection of match rules
|
69
|
+
test_files:
|
70
|
+
- spec/textquery_spec.rb
|