textquery 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/examples/web.rb +49 -0
- data/lib/textquery/textquery.rb +28 -7
- data/lib/textquery/textquery_grammar.treetop +10 -10
- data/spec/textquery_spec.rb +15 -16
- metadata +4 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
data/examples/web.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "sinatra"
|
3
|
+
require "textquery"
|
4
|
+
require "erb"
|
5
|
+
|
6
|
+
$KCODE = 'u'
|
7
|
+
|
8
|
+
get "/" do
|
9
|
+
params['textstring'] = 'some random text'
|
10
|
+
params['query'] = 'some AND (text AND NOT (random OR match OR word))'
|
11
|
+
@result = TextQuery.new(params['query']).eval(params['textstring'])
|
12
|
+
|
13
|
+
erb :test
|
14
|
+
end
|
15
|
+
|
16
|
+
post "/" do
|
17
|
+
@result = TextQuery.new(params['query']).eval(params['textstring'])
|
18
|
+
erb :test
|
19
|
+
end
|
20
|
+
|
21
|
+
__END__
|
22
|
+
|
23
|
+
@@ test
|
24
|
+
<p>Result: <strong><%= @result ? "Matched" : "No match" %></strong></p>
|
25
|
+
|
26
|
+
<form action="/" method="post">
|
27
|
+
<label>Text</label><br />
|
28
|
+
<textarea name="textstring" cols="100" rows="6"><%= params['textstring'] %></textarea><br />
|
29
|
+
|
30
|
+
<label>Query</label><br />
|
31
|
+
<textarea name="query" cols="100" rows="2"><%= params['query'] %></textarea><br />
|
32
|
+
|
33
|
+
<br />
|
34
|
+
<input type="submit">
|
35
|
+
</form>
|
36
|
+
|
37
|
+
<pre style="background-color:#ccc; padding:2em;">
|
38
|
+
Supported operators and rules:
|
39
|
+
* AND (spaces are implicit AND’s)
|
40
|
+
* OR
|
41
|
+
* NOT ('-' is an alias)
|
42
|
+
* 'quoted strings'
|
43
|
+
|
44
|
+
Examples queries:
|
45
|
+
* 'to be' OR NOT 'to_be'
|
46
|
+
* -omitstring
|
47
|
+
* a AND b
|
48
|
+
* a AND (b AND NOT (c OR d))
|
49
|
+
</pre>
|
data/lib/textquery/textquery.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
require 'treetop'
|
2
2
|
|
3
|
+
# make it utf-8 compatible
|
4
|
+
if RUBY_VERSION < '1.9'
|
5
|
+
require 'active_support'
|
6
|
+
$KCODE = 'u'
|
7
|
+
end
|
8
|
+
|
3
9
|
class WordMatch < Treetop::Runtime::SyntaxNode
|
4
|
-
def eval(text)
|
5
|
-
not text.match(
|
10
|
+
def eval(text, opt)
|
11
|
+
not text.match("^#{query}#{opt[:delim]}|#{opt[:delim]}#{query}#{opt[:delim]}|#{opt[:delim]}#{query}$|^#{query}$").nil?
|
6
12
|
end
|
7
13
|
|
8
14
|
def query
|
@@ -13,28 +19,43 @@ end
|
|
13
19
|
Treetop.load File.dirname(__FILE__) + "/textquery_grammar"
|
14
20
|
|
15
21
|
class TextQuery
|
16
|
-
def initialize(query = '')
|
22
|
+
def initialize(query = '', options = {})
|
17
23
|
@parser = TextQueryGrammarParser.new
|
18
24
|
@query = nil
|
19
25
|
|
26
|
+
update_options(options)
|
20
27
|
parse(query) if not query.empty?
|
21
28
|
end
|
22
29
|
|
23
30
|
def parse(query)
|
31
|
+
query = query.mb_chars if RUBY_VERSION < '1.9'
|
24
32
|
@query = @parser.parse(query)
|
25
33
|
if not @query
|
26
34
|
puts @parser.terminal_failures.join("\n")
|
27
35
|
end
|
28
|
-
|
36
|
+
self
|
29
37
|
end
|
30
38
|
|
31
|
-
def eval(input)
|
39
|
+
def eval(input, options = {})
|
40
|
+
update_options(options) if not options.empty?
|
41
|
+
|
32
42
|
if @query
|
33
|
-
@query.eval(input)
|
43
|
+
@query.eval(input, @options)
|
34
44
|
else
|
35
45
|
puts 'no query specified'
|
36
46
|
end
|
37
47
|
end
|
38
48
|
alias :match? :eval
|
39
|
-
|
49
|
+
|
50
|
+
def terminal_failures
|
51
|
+
@parser.terminal_failures
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def update_options(options)
|
57
|
+
@options = {:delim => ' '}.merge options
|
58
|
+
@options[:delim] = Regexp.escape @options[:delim]
|
59
|
+
end
|
60
|
+
|
40
61
|
end
|
@@ -6,14 +6,14 @@ grammar TextQueryGrammar
|
|
6
6
|
|
7
7
|
rule logical
|
8
8
|
op1:value space operator:binary space op2:expression {
|
9
|
-
def eval(text)
|
10
|
-
operator.eval(op1.eval(text), op2.eval(text))
|
9
|
+
def eval(text, opt)
|
10
|
+
operator.eval(op1.eval(text, opt), op2.eval(text, opt))
|
11
11
|
end
|
12
12
|
}
|
13
13
|
/
|
14
14
|
op1:value [\s]+ op2:expression {
|
15
|
-
def eval(text)
|
16
|
-
op1.eval(text) && op2.eval(text)
|
15
|
+
def eval(text, opt)
|
16
|
+
op1.eval(text, opt) && op2.eval(text, opt)
|
17
17
|
end
|
18
18
|
}
|
19
19
|
end
|
@@ -54,20 +54,20 @@ grammar TextQueryGrammar
|
|
54
54
|
|
55
55
|
rule value
|
56
56
|
'(' space expression space ')' {
|
57
|
-
def eval(text)
|
58
|
-
expression.eval(text)
|
57
|
+
def eval(text, opt)
|
58
|
+
expression.eval(text, opt)
|
59
59
|
end
|
60
60
|
}
|
61
61
|
/
|
62
62
|
operator:unary space value {
|
63
|
-
def eval(text)
|
64
|
-
operator.eval(value.eval(text))
|
63
|
+
def eval(text, opt)
|
64
|
+
operator.eval(value.eval(text, opt))
|
65
65
|
end
|
66
66
|
}
|
67
67
|
/
|
68
68
|
"'" space words space "'" {
|
69
|
-
def eval(text)
|
70
|
-
words.eval(text)
|
69
|
+
def eval(text, opt)
|
70
|
+
words.eval(text, opt)
|
71
71
|
end
|
72
72
|
}
|
73
73
|
/
|
data/spec/textquery_spec.rb
CHANGED
@@ -18,18 +18,7 @@ describe TextQuery do
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def parse(input)
|
21
|
-
|
22
|
-
unless result
|
23
|
-
puts @parser.terminal_failures.join("\n")
|
24
|
-
end
|
25
|
-
result
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should accept any non space separated sequence" do
|
29
|
-
%w[query 123 text123 #tag $%*].each do |input|
|
30
|
-
@parser.parse(input).text_value.should == input
|
31
|
-
parse(input).eval(input).should be_true
|
32
|
-
end
|
21
|
+
@parser.parse(input)
|
33
22
|
end
|
34
23
|
|
35
24
|
it "should look for exact word boundary match" do
|
@@ -48,7 +37,7 @@ describe TextQuery do
|
|
48
37
|
parse("a AND b").eval("a b").should be_true
|
49
38
|
parse("a AND b").eval("a c b").should be_true
|
50
39
|
end
|
51
|
-
|
40
|
+
|
52
41
|
it "should accept logical OR" do
|
53
42
|
parse("a OR b").eval("c").should be_false
|
54
43
|
parse("a OR b").eval("a").should be_true
|
@@ -90,10 +79,10 @@ describe TextQuery do
|
|
90
79
|
parse("(a AND b) OR c").eval("a b c").should be_true
|
91
80
|
parse("(a AND b) OR c").eval("a b").should be_true
|
92
81
|
parse("(a AND b) OR c").eval("a c").should be_true
|
93
|
-
|
82
|
+
|
94
83
|
parse("(a AND b) OR c").eval("c").should be_true
|
95
84
|
parse("a AND (b OR c)").eval("c").should be_false
|
96
|
-
|
85
|
+
|
97
86
|
# for the win...
|
98
87
|
parse("a AND (b AND (c OR d))").eval("d a b").should be_true
|
99
88
|
end
|
@@ -116,7 +105,7 @@ describe TextQuery do
|
|
116
105
|
parse("a AND (b AND (c OR NOT d))").eval("a b d").should be_false
|
117
106
|
parse("a AND (b AND (c OR NOT d))").eval("a b c").should be_true
|
118
107
|
parse("a AND (b AND (c OR NOT d))").eval("a b e").should be_true
|
119
|
-
|
108
|
+
|
120
109
|
parse("a AND (b AND NOT (c OR d))").eval("a b").should be_true
|
121
110
|
parse("a AND (b AND NOT (c OR d))").eval("a b c").should be_false
|
122
111
|
parse("a AND (b AND NOT (c OR d))").eval("a b d").should be_false
|
@@ -169,6 +158,16 @@ describe TextQuery do
|
|
169
158
|
q.match?("b").should be_false
|
170
159
|
q.match?("a b cdefg").should be_true
|
171
160
|
q.eval("a b cdefg").should be_true
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should work on CJK text" do
|
164
|
+
JP = "に入れるわけにはいかないので、プラグインの出力が同一であることでもって同一性を判定する"
|
165
|
+
|
166
|
+
q = TextQuery.new("に入".mb_chars, :delim => '')
|
167
|
+
q.eval(JP).should be_true
|
168
|
+
q.eval("けにはい").should be_false
|
172
169
|
|
170
|
+
q.parse("れるわ AND が同".mb_chars).eval(JP).should be_true
|
171
|
+
q.parse("れるわ AND NOT す".mb_chars).eval(JP).should be_false
|
173
172
|
end
|
174
173
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Grigorik
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-29 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -34,6 +34,7 @@ files:
|
|
34
34
|
- README.rdoc
|
35
35
|
- Rakefile
|
36
36
|
- VERSION
|
37
|
+
- examples/web.rb
|
37
38
|
- lib/textquery.rb
|
38
39
|
- lib/textquery/textquery.rb
|
39
40
|
- lib/textquery/textquery_grammar.treetop
|
@@ -68,3 +69,4 @@ specification_version: 3
|
|
68
69
|
summary: Evaluate any text against a collection of match rules
|
69
70
|
test_files:
|
70
71
|
- spec/textquery_spec.rb
|
72
|
+
- examples/web.rb
|