textquery 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ = TextQuery
2
+
3
+ Does it match? When regular expressions are not enough, textquery is the answer. For
4
+ example, regular expressions cannot evaluate recursive rules and often result in
5
+ overly verbose and complicated expressions.
6
+
7
+ Textquery is a simple PEG grammar with support for:
8
+ - AND (spaces are implicit AND's)
9
+ - OR
10
+ - NOT (- is an alias)
11
+ - 'quoted strings'
12
+
13
+ == Example
14
+
15
+ TextQuery.new("'to be' OR NOT 'to_be'").match?("to be") # => true
16
+
17
+ TextQuery.new("-test").match?("some string of text") # => true
18
+ TextQuery.new("NOT test").match?("some string of text") # => true
19
+
20
+ TextQuery.new("a AND b").match?("b a") # => true
21
+ TextQuery.new("a AND b").match?("a c") # => false
22
+
23
+ q = TextQuery.new("a AND (b AND NOT (c OR d))")
24
+ q.match?("d a b") # => false
25
+ q.match?("b") # => false
26
+ q.match?("a b cdefg") # => true
27
+
28
+ == License
29
+
30
+ (The MIT License)
31
+
32
+ Copyright (c) 2009 Ilya Grigorik
33
+
34
+ Permission is hereby granted, free of charge, to any person obtaining
35
+ a copy of this software and associated documentation files (the
36
+ 'Software'), to deal in the Software without restriction, including
37
+ without limitation the rights to use, copy, modify, merge, publish,
38
+ distribute, sublicense, and/or sell copies of the Software, and to
39
+ permit persons to whom the Software is furnished to do so, subject to
40
+ the following conditions:
41
+
42
+ The above copyright notice and this permission notice shall be
43
+ included in all copies or substantial portions of the Software.
44
+
45
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
46
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
47
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
48
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
49
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
50
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
51
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ require 'rake'
2
+
3
+ begin
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gemspec|
6
+ gemspec.name = "textquery"
7
+ gemspec.summary = "Evaluate any text against a collection of match rules"
8
+ gemspec.description = gemspec.summary
9
+ gemspec.email = "ilya@igvita.com"
10
+ gemspec.homepage = "http://github.com/igrigorik/textquery"
11
+ gemspec.authors = ["Ilya Grigorik"]
12
+ gemspec.add_dependency("treetop")
13
+ gemspec.rubyforge_project = "textquery"
14
+ end
15
+
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
19
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,3 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+
3
+ require "textquery/textquery.rb"
@@ -0,0 +1,40 @@
1
+ require 'treetop'
2
+
3
+ class WordMatch < Treetop::Runtime::SyntaxNode
4
+ def eval(text)
5
+ not text.match(/^#{query}\s|\s#{query}\s|\s#{query}$|^#{query}$/).nil?
6
+ end
7
+
8
+ def query
9
+ Regexp.escape(text_value)
10
+ end
11
+ end
12
+
13
+ Treetop.load File.dirname(__FILE__) + "/textquery_grammar"
14
+
15
+ class TextQuery
16
+ def initialize(query = '')
17
+ @parser = TextQueryGrammarParser.new
18
+ @query = nil
19
+
20
+ parse(query) if not query.empty?
21
+ end
22
+
23
+ def parse(query)
24
+ @query = @parser.parse(query)
25
+ if not @query
26
+ puts @parser.terminal_failures.join("\n")
27
+ end
28
+ @query
29
+ end
30
+
31
+ def eval(input)
32
+ if @query
33
+ @query.eval(input)
34
+ else
35
+ puts 'no query specified'
36
+ end
37
+ end
38
+ alias :match? :eval
39
+
40
+ end
@@ -0,0 +1,77 @@
1
+ grammar TextQueryGrammar
2
+
3
+ rule expression
4
+ logical / value
5
+ end
6
+
7
+ rule logical
8
+ op1:value space operator:binary space op2:expression {
9
+ def eval(text)
10
+ operator.eval(op1.eval(text), op2.eval(text))
11
+ end
12
+ }
13
+ /
14
+ op1:value [\s]+ op2:expression {
15
+ def eval(text)
16
+ op1.eval(text) && op2.eval(text)
17
+ end
18
+ }
19
+ end
20
+
21
+ rule binary
22
+ 'AND' {
23
+ def eval(a,b)
24
+ a && b
25
+ end
26
+ }
27
+ /
28
+ 'OR' {
29
+ def eval(a,b)
30
+ a || b
31
+ end
32
+ }
33
+ end
34
+
35
+ rule unary
36
+ ('-' / 'NOT') {
37
+ def eval(a)
38
+ not a
39
+ end
40
+ }
41
+ end
42
+
43
+ rule space
44
+ [\s]*
45
+ end
46
+
47
+ rule word
48
+ [^\s\(\)]+ <WordMatch>
49
+ end
50
+
51
+ rule words
52
+ [^\']+ <WordMatch>
53
+ end
54
+
55
+ rule value
56
+ '(' space expression space ')' {
57
+ def eval(text)
58
+ expression.eval(text)
59
+ end
60
+ }
61
+ /
62
+ operator:unary space value {
63
+ def eval(text)
64
+ operator.eval(value.eval(text))
65
+ end
66
+ }
67
+ /
68
+ "'" space words space "'" {
69
+ def eval(text)
70
+ words.eval(text)
71
+ end
72
+ }
73
+ /
74
+ word
75
+ end
76
+
77
+ end
@@ -0,0 +1,174 @@
1
+ require "rubygems"
2
+ require "spec"
3
+ require "pp"
4
+
5
+ require "lib/textquery"
6
+
7
+ # Resources:
8
+ # - http://github.com/nathansobo/treetop
9
+ # - http://github.com/nathansobo/treetop/blob/master/examples/lambda_calculus/arithmetic.treetop
10
+ # - http://snippets.dzone.com/tag/Treetop
11
+ # - http://treetop.rubyforge.org/index.html
12
+ # - http://en.wikipedia.org/wiki/Parsing_expression_grammar
13
+ #
14
+
15
+ describe TextQuery do
16
+ before(:all) do
17
+ @parser = TextQuery.new
18
+ end
19
+
20
+ def parse(input)
21
+ result = @parser.parse(input)
22
+ unless result
23
+ puts @parser.terminal_failures.join("\n")
24
+ end
25
+ result
26
+ end
27
+
28
+ it "should accept any non space separated sequence" do
29
+ %w[query 123 text123 #tag $%*].each do |input|
30
+ @parser.parse(input).text_value.should == input
31
+ parse(input).eval(input).should be_true
32
+ end
33
+ end
34
+
35
+ it "should look for exact word boundary match" do
36
+ parse("text").eval("textstring").should be_false
37
+ parse("text").eval("stringtext").should be_false
38
+ parse("text").eval("some textstring").should be_false
39
+ parse("text").eval("string of texts stuff").should be_false
40
+ parse("$^").eval("string of $^* stuff").should be_false
41
+ end
42
+
43
+ it "should accept logical AND" do
44
+ parse("a AND b").eval("c").should be_false
45
+ parse("a AND b").eval("a").should be_false
46
+ parse("a AND b").eval("b").should be_false
47
+
48
+ parse("a AND b").eval("a b").should be_true
49
+ parse("a AND b").eval("a c b").should be_true
50
+ end
51
+
52
+ it "should accept logical OR" do
53
+ parse("a OR b").eval("c").should be_false
54
+ parse("a OR b").eval("a").should be_true
55
+ parse("a OR b").eval("b").should be_true
56
+
57
+ parse("a OR b").eval("a b").should be_true
58
+ parse("a OR b").eval("a c b").should be_true
59
+ end
60
+
61
+ it "should give precedence to AND" do
62
+ # a AND (b OR c) == a AND b OR c
63
+ parse("a AND b OR c").eval("a b c").should be_true
64
+ parse("a AND b OR c").eval("a b").should be_true
65
+ parse("a AND b OR c").eval("a c").should be_true
66
+
67
+ parse("a AND b OR c").eval("b c").should be_false
68
+ parse("a AND b OR c").eval("c").should be_false
69
+ parse("a AND b OR c").eval("b").should be_false
70
+ end
71
+
72
+ it "should accept logical NOT" do
73
+ %w[- NOT].each do |operator|
74
+ parse("#{operator} a").eval("a").should be_false
75
+ parse("#{operator} #{operator} a").eval("a").should be_true
76
+
77
+ parse("#{operator} a OR a").eval("a").should be_true
78
+ parse("a OR #{operator} a").eval("a").should be_true
79
+
80
+ parse("b AND #{operator} a").eval("b").should be_true
81
+ parse("b AND #{operator} a").eval("a").should be_false
82
+ end
83
+ end
84
+
85
+ it "should evaluate sub expressions" do
86
+ parse("(a AND b)").eval("a b").should be_true
87
+ parse("(a OR b)").eval("a b").should be_true
88
+ parse("(a AND NOT b)").eval("a b").should be_false
89
+
90
+ parse("(a AND b) OR c").eval("a b c").should be_true
91
+ parse("(a AND b) OR c").eval("a b").should be_true
92
+ parse("(a AND b) OR c").eval("a c").should be_true
93
+
94
+ parse("(a AND b) OR c").eval("c").should be_true
95
+ parse("a AND (b OR c)").eval("c").should be_false
96
+
97
+ # for the win...
98
+ parse("a AND (b AND (c OR d))").eval("d a b").should be_true
99
+ end
100
+
101
+ it "should not trip up on placement of brackets" do
102
+ parse("a AND (-b)").eval("a b").should == parse("a AND -(b)").eval("a b")
103
+ parse("(-a) AND b").eval("a b").should == parse("-(a) AND b").eval("a b")
104
+ parse("-(a) AND -(b)").eval("a b").should == parse("(-a) AND (-b)").eval("a b")
105
+
106
+ parse("a OR (-b)").eval("a b").should == parse("a OR -(b)").eval("a b")
107
+ parse("(-a) OR b").eval("a b").should == parse("-(a) OR b").eval("a b")
108
+ parse("(-a) OR (-b)").eval("a b").should == parse("-(a) OR -(b)").eval("a b")
109
+
110
+ parse("a AND (b OR c)").eval("a b c").should be_true
111
+ parse("a AND (b OR c)").eval("a b").should be_true
112
+ parse("a AND (b OR c)").eval("a c").should be_true
113
+
114
+ parse("(NOT a) OR a").eval("a").should be_true
115
+ parse("(NOT a) AND (NOT b) AND (NOT c)").eval("b").should be_false
116
+ parse("a AND (b AND (c OR NOT d))").eval("a b d").should be_false
117
+ parse("a AND (b AND (c OR NOT d))").eval("a b c").should be_true
118
+ parse("a AND (b AND (c OR NOT d))").eval("a b e").should be_true
119
+
120
+ parse("a AND (b AND NOT (c OR d))").eval("a b").should be_true
121
+ parse("a AND (b AND NOT (c OR d))").eval("a b c").should be_false
122
+ parse("a AND (b AND NOT (c OR d))").eval("a b d").should be_false
123
+
124
+ parse("-a AND -b AND -c").eval("e").should be_true
125
+ parse("(-a) AND (-b) AND (-c)").eval("e").should be_true
126
+ parse("(NOT a) AND (NOT b) AND (NOT c)").eval("e").should be_true
127
+ parse("NOT a AND NOT b AND NOT c").eval("e").should be_true
128
+ end
129
+
130
+ it "should accept quoted strings" do
131
+ parse("'some text'").eval("some text").should be_true
132
+ parse("'some text string'").eval("some text").should be_false
133
+
134
+ parse("'some text string'").eval("some text 1 string").should be_false
135
+ parse("-'some text string'").eval("some text 1 string").should be_true
136
+
137
+ parse("a AND -'a b'").eval("a b c").should be_false
138
+ parse("a AND -'a b'").eval("a c b").should be_true
139
+
140
+ parse("(a OR b) AND (-'a b c')").eval("a b c").should be_false
141
+ parse("(a OR b) AND (-'a b c')").eval("a c b").should be_true
142
+ parse("(a AND b) AND (-'a b c')").eval("a c b").should be_true
143
+
144
+ # shakespeare got nothin' on ruby...
145
+ parse("'to be' OR NOT 'to be'").eval("to be").should be_true
146
+ end
147
+
148
+ it "should treat spaces as implicit ANDs" do
149
+ parse("a b").eval("a c b").should be_true
150
+ parse("b a c").eval("a c b").should be_true
151
+ parse("b a c").eval("a c").should be_false
152
+
153
+ parse("some text AND 'exact match'").eval("some exact match text").should be_true
154
+ parse("some text AND 'exact match'").eval("some exact text match").should be_false
155
+
156
+ parse("some text AND -'exact match'").eval("some exact text match").should be_true
157
+ parse("some text AND -'exact match'").eval("some exact match").should be_false
158
+ end
159
+
160
+ it "should wrap the grammar API" do
161
+ TextQuery.new("'to be' OR NOT 'to_be'").match?("to be").should be_true
162
+ TextQuery.new("-test").match?("some string of text").should be_true
163
+ TextQuery.new("NOT test").match?("some string of text").should be_true
164
+ TextQuery.new("a AND b").match?("b a").should be_true
165
+ TextQuery.new("a AND b").match?("a c").should be_false
166
+
167
+ q = TextQuery.new("a AND (b AND NOT (c OR d))")
168
+ q.match?("d a b").should be_false
169
+ q.match?("b").should be_false
170
+ q.match?("a b cdefg").should be_true
171
+ q.eval("a b cdefg").should be_true
172
+
173
+ end
174
+ end
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: textquery
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ilya Grigorik
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-28 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: treetop
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Evaluate any text against a collection of match rules
26
+ email: ilya@igvita.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.rdoc
33
+ files:
34
+ - README.rdoc
35
+ - Rakefile
36
+ - VERSION
37
+ - lib/textquery.rb
38
+ - lib/textquery/textquery.rb
39
+ - lib/textquery/textquery_grammar.treetop
40
+ - spec/textquery_spec.rb
41
+ has_rdoc: true
42
+ homepage: http://github.com/igrigorik/textquery
43
+ licenses: []
44
+
45
+ post_install_message:
46
+ rdoc_options:
47
+ - --charset=UTF-8
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ requirements: []
63
+
64
+ rubyforge_project: textquery
65
+ rubygems_version: 1.3.5
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Evaluate any text against a collection of match rules
69
+ test_files:
70
+ - spec/textquery_spec.rb