textquery 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/examples/web.rb +49 -0
- data/lib/textquery/textquery.rb +28 -7
- data/lib/textquery/textquery_grammar.treetop +10 -10
- data/spec/textquery_spec.rb +15 -16
- metadata +4 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
data/examples/web.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "sinatra"
|
3
|
+
require "textquery"
|
4
|
+
require "erb"
|
5
|
+
|
6
|
+
$KCODE = 'u'
|
7
|
+
|
8
|
+
get "/" do
|
9
|
+
params['textstring'] = 'some random text'
|
10
|
+
params['query'] = 'some AND (text AND NOT (random OR match OR word))'
|
11
|
+
@result = TextQuery.new(params['query']).eval(params['textstring'])
|
12
|
+
|
13
|
+
erb :test
|
14
|
+
end
|
15
|
+
|
16
|
+
post "/" do
|
17
|
+
@result = TextQuery.new(params['query']).eval(params['textstring'])
|
18
|
+
erb :test
|
19
|
+
end
|
20
|
+
|
21
|
+
__END__
|
22
|
+
|
23
|
+
@@ test
|
24
|
+
<p>Result: <strong><%= @result ? "Matched" : "No match" %></strong></p>
|
25
|
+
|
26
|
+
<form action="/" method="post">
|
27
|
+
<label>Text</label><br />
|
28
|
+
<textarea name="textstring" cols="100" rows="6"><%= params['textstring'] %></textarea><br />
|
29
|
+
|
30
|
+
<label>Query</label><br />
|
31
|
+
<textarea name="query" cols="100" rows="2"><%= params['query'] %></textarea><br />
|
32
|
+
|
33
|
+
<br />
|
34
|
+
<input type="submit">
|
35
|
+
</form>
|
36
|
+
|
37
|
+
<pre style="background-color:#ccc; padding:2em;">
|
38
|
+
Supported operators and rules:
|
39
|
+
* AND (spaces are implicit AND’s)
|
40
|
+
* OR
|
41
|
+
* NOT ('-' is an alias)
|
42
|
+
* 'quoted strings'
|
43
|
+
|
44
|
+
Examples queries:
|
45
|
+
* 'to be' OR NOT 'to_be'
|
46
|
+
* -omitstring
|
47
|
+
* a AND b
|
48
|
+
* a AND (b AND NOT (c OR d))
|
49
|
+
</pre>
|
data/lib/textquery/textquery.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
require 'treetop'
|
2
2
|
|
3
|
+
# make it utf-8 compatible
|
4
|
+
if RUBY_VERSION < '1.9'
|
5
|
+
require 'active_support'
|
6
|
+
$KCODE = 'u'
|
7
|
+
end
|
8
|
+
|
3
9
|
class WordMatch < Treetop::Runtime::SyntaxNode
|
4
|
-
def eval(text)
|
5
|
-
not text.match(
|
10
|
+
def eval(text, opt)
|
11
|
+
not text.match("^#{query}#{opt[:delim]}|#{opt[:delim]}#{query}#{opt[:delim]}|#{opt[:delim]}#{query}$|^#{query}$").nil?
|
6
12
|
end
|
7
13
|
|
8
14
|
def query
|
@@ -13,28 +19,43 @@ end
|
|
13
19
|
Treetop.load File.dirname(__FILE__) + "/textquery_grammar"
|
14
20
|
|
15
21
|
class TextQuery
|
16
|
-
def initialize(query = '')
|
22
|
+
def initialize(query = '', options = {})
|
17
23
|
@parser = TextQueryGrammarParser.new
|
18
24
|
@query = nil
|
19
25
|
|
26
|
+
update_options(options)
|
20
27
|
parse(query) if not query.empty?
|
21
28
|
end
|
22
29
|
|
23
30
|
def parse(query)
|
31
|
+
query = query.mb_chars if RUBY_VERSION < '1.9'
|
24
32
|
@query = @parser.parse(query)
|
25
33
|
if not @query
|
26
34
|
puts @parser.terminal_failures.join("\n")
|
27
35
|
end
|
28
|
-
|
36
|
+
self
|
29
37
|
end
|
30
38
|
|
31
|
-
def eval(input)
|
39
|
+
def eval(input, options = {})
|
40
|
+
update_options(options) if not options.empty?
|
41
|
+
|
32
42
|
if @query
|
33
|
-
@query.eval(input)
|
43
|
+
@query.eval(input, @options)
|
34
44
|
else
|
35
45
|
puts 'no query specified'
|
36
46
|
end
|
37
47
|
end
|
38
48
|
alias :match? :eval
|
39
|
-
|
49
|
+
|
50
|
+
def terminal_failures
|
51
|
+
@parser.terminal_failures
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def update_options(options)
|
57
|
+
@options = {:delim => ' '}.merge options
|
58
|
+
@options[:delim] = Regexp.escape @options[:delim]
|
59
|
+
end
|
60
|
+
|
40
61
|
end
|
@@ -6,14 +6,14 @@ grammar TextQueryGrammar
|
|
6
6
|
|
7
7
|
rule logical
|
8
8
|
op1:value space operator:binary space op2:expression {
|
9
|
-
def eval(text)
|
10
|
-
operator.eval(op1.eval(text), op2.eval(text))
|
9
|
+
def eval(text, opt)
|
10
|
+
operator.eval(op1.eval(text, opt), op2.eval(text, opt))
|
11
11
|
end
|
12
12
|
}
|
13
13
|
/
|
14
14
|
op1:value [\s]+ op2:expression {
|
15
|
-
def eval(text)
|
16
|
-
op1.eval(text) && op2.eval(text)
|
15
|
+
def eval(text, opt)
|
16
|
+
op1.eval(text, opt) && op2.eval(text, opt)
|
17
17
|
end
|
18
18
|
}
|
19
19
|
end
|
@@ -54,20 +54,20 @@ grammar TextQueryGrammar
|
|
54
54
|
|
55
55
|
rule value
|
56
56
|
'(' space expression space ')' {
|
57
|
-
def eval(text)
|
58
|
-
expression.eval(text)
|
57
|
+
def eval(text, opt)
|
58
|
+
expression.eval(text, opt)
|
59
59
|
end
|
60
60
|
}
|
61
61
|
/
|
62
62
|
operator:unary space value {
|
63
|
-
def eval(text)
|
64
|
-
operator.eval(value.eval(text))
|
63
|
+
def eval(text, opt)
|
64
|
+
operator.eval(value.eval(text, opt))
|
65
65
|
end
|
66
66
|
}
|
67
67
|
/
|
68
68
|
"'" space words space "'" {
|
69
|
-
def eval(text)
|
70
|
-
words.eval(text)
|
69
|
+
def eval(text, opt)
|
70
|
+
words.eval(text, opt)
|
71
71
|
end
|
72
72
|
}
|
73
73
|
/
|
data/spec/textquery_spec.rb
CHANGED
@@ -18,18 +18,7 @@ describe TextQuery do
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def parse(input)
|
21
|
-
|
22
|
-
unless result
|
23
|
-
puts @parser.terminal_failures.join("\n")
|
24
|
-
end
|
25
|
-
result
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should accept any non space separated sequence" do
|
29
|
-
%w[query 123 text123 #tag $%*].each do |input|
|
30
|
-
@parser.parse(input).text_value.should == input
|
31
|
-
parse(input).eval(input).should be_true
|
32
|
-
end
|
21
|
+
@parser.parse(input)
|
33
22
|
end
|
34
23
|
|
35
24
|
it "should look for exact word boundary match" do
|
@@ -48,7 +37,7 @@ describe TextQuery do
|
|
48
37
|
parse("a AND b").eval("a b").should be_true
|
49
38
|
parse("a AND b").eval("a c b").should be_true
|
50
39
|
end
|
51
|
-
|
40
|
+
|
52
41
|
it "should accept logical OR" do
|
53
42
|
parse("a OR b").eval("c").should be_false
|
54
43
|
parse("a OR b").eval("a").should be_true
|
@@ -90,10 +79,10 @@ describe TextQuery do
|
|
90
79
|
parse("(a AND b) OR c").eval("a b c").should be_true
|
91
80
|
parse("(a AND b) OR c").eval("a b").should be_true
|
92
81
|
parse("(a AND b) OR c").eval("a c").should be_true
|
93
|
-
|
82
|
+
|
94
83
|
parse("(a AND b) OR c").eval("c").should be_true
|
95
84
|
parse("a AND (b OR c)").eval("c").should be_false
|
96
|
-
|
85
|
+
|
97
86
|
# for the win...
|
98
87
|
parse("a AND (b AND (c OR d))").eval("d a b").should be_true
|
99
88
|
end
|
@@ -116,7 +105,7 @@ describe TextQuery do
|
|
116
105
|
parse("a AND (b AND (c OR NOT d))").eval("a b d").should be_false
|
117
106
|
parse("a AND (b AND (c OR NOT d))").eval("a b c").should be_true
|
118
107
|
parse("a AND (b AND (c OR NOT d))").eval("a b e").should be_true
|
119
|
-
|
108
|
+
|
120
109
|
parse("a AND (b AND NOT (c OR d))").eval("a b").should be_true
|
121
110
|
parse("a AND (b AND NOT (c OR d))").eval("a b c").should be_false
|
122
111
|
parse("a AND (b AND NOT (c OR d))").eval("a b d").should be_false
|
@@ -169,6 +158,16 @@ describe TextQuery do
|
|
169
158
|
q.match?("b").should be_false
|
170
159
|
q.match?("a b cdefg").should be_true
|
171
160
|
q.eval("a b cdefg").should be_true
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should work on CJK text" do
|
164
|
+
JP = "に入れるわけにはいかないので、プラグインの出力が同一であることでもって同一性を判定する"
|
165
|
+
|
166
|
+
q = TextQuery.new("に入".mb_chars, :delim => '')
|
167
|
+
q.eval(JP).should be_true
|
168
|
+
q.eval("けにはい").should be_false
|
172
169
|
|
170
|
+
q.parse("れるわ AND が同".mb_chars).eval(JP).should be_true
|
171
|
+
q.parse("れるわ AND NOT す".mb_chars).eval(JP).should be_false
|
173
172
|
end
|
174
173
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Grigorik
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-29 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -34,6 +34,7 @@ files:
|
|
34
34
|
- README.rdoc
|
35
35
|
- Rakefile
|
36
36
|
- VERSION
|
37
|
+
- examples/web.rb
|
37
38
|
- lib/textquery.rb
|
38
39
|
- lib/textquery/textquery.rb
|
39
40
|
- lib/textquery/textquery_grammar.treetop
|
@@ -68,3 +69,4 @@ specification_version: 3
|
|
68
69
|
summary: Evaluate any text against a collection of match rules
|
69
70
|
test_files:
|
70
71
|
- spec/textquery_spec.rb
|
72
|
+
- examples/web.rb
|