keyword_search 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Rakefile +16 -0
- data/lib/keyword_search.rb +6 -5
- data/lib/keyword_search/grammar.rb +1 -1
- data/lib/keyword_search/parser.rb +18 -19
- data/lib/keyword_search/tokenizer.rb +0 -105
- data/test/test_keyword_search.rb +12 -1
- metadata +2 -2
data/History.txt
CHANGED
data/Rakefile
CHANGED
@@ -23,4 +23,20 @@ task :rebuild_parser do
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
+
task :rebuild_lexer do
|
27
|
+
require 'dhaka'
|
28
|
+
lexer = Dhaka::Lexer.new(KeywordSearch::LexerSpec)
|
29
|
+
File.open('lib/keyword_search/lexer.rb', 'w') do |file|
|
30
|
+
file << lexer.compile_to_ruby_source_as('KeywordSearch::Lexer')
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
task :rebuild_lexer do
|
35
|
+
require 'dhaka'
|
36
|
+
lexer = Dhaka::Lexer.new(KeywordSearch::LexerSpec)
|
37
|
+
File.open('lib/keyword_search/lexer.rb', 'w') do |file|
|
38
|
+
file << lexer.compile_to_ruby_source_as('KeywordSearch::Lexer')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
26
42
|
# vim: syntax=Ruby
|
data/lib/keyword_search.rb
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
require 'dhaka'
|
2
2
|
|
3
3
|
dirname = File.join(File.dirname(__FILE__), 'keyword_search')
|
4
|
-
%w|grammar
|
4
|
+
%w|grammar parser lexer_spec lexer evaluator definition|.each do |dependency|
|
5
5
|
require File.join(dirname, dependency)
|
6
6
|
end
|
7
7
|
|
8
8
|
module KeywordSearch
|
9
9
|
|
10
|
-
|
10
|
+
class ParseError < ::SyntaxError; end
|
11
|
+
|
12
|
+
VERSION = '1.2.0'
|
11
13
|
|
12
14
|
class << self
|
13
15
|
def search(input_string, definition=nil, &block)
|
14
16
|
@evaluator ||= Evaluator.new
|
15
17
|
definition ||= Definition.new(&block)
|
16
|
-
|
17
|
-
parse_result = Parser.parse(tokens)
|
18
|
+
parse_result = Parser.parse(Lexer.lex(input_string))
|
18
19
|
unless parse_result.has_error?
|
19
20
|
results = @evaluator.evaluate(parse_result.parse_tree)
|
20
21
|
results.each do |key, terms|
|
@@ -22,7 +23,7 @@ module KeywordSearch
|
|
22
23
|
end
|
23
24
|
results
|
24
25
|
else
|
25
|
-
{}
|
26
|
+
raise ParseError, "Unexpected token #{parse_result.unexpected_token.inspect}"
|
26
27
|
end
|
27
28
|
end
|
28
29
|
end
|
@@ -4,38 +4,37 @@ class KeywordSearch::Parser < Dhaka::CompiledParser
|
|
4
4
|
|
5
5
|
start_with 0
|
6
6
|
|
7
|
-
at_state(3) {
|
8
|
-
for_symbols("k", "_End_", "s") { reduce_with "one_pair" }
|
9
|
-
}
|
10
|
-
|
11
7
|
at_state(1) {
|
12
|
-
for_symbols("
|
8
|
+
for_symbols("_End_") { reduce_with "start" }
|
9
|
+
for_symbols("s") { shift_to 3 }
|
10
|
+
for_symbols("Pair") { shift_to 2 }
|
13
11
|
}
|
14
12
|
|
15
|
-
at_state(
|
16
|
-
for_symbols("
|
13
|
+
at_state(5) {
|
14
|
+
for_symbols("_End_", "s") { reduce_with "keyword_and_term" }
|
17
15
|
}
|
18
16
|
|
19
|
-
at_state(
|
20
|
-
for_symbols("s") { shift_to
|
21
|
-
for_symbols("_End_") { reduce_with "start" }
|
22
|
-
for_symbols("k") { shift_to 1 }
|
23
|
-
for_symbols("Pair") { shift_to 6 }
|
17
|
+
at_state(4) {
|
18
|
+
for_symbols("s") { shift_to 5 }
|
24
19
|
}
|
25
20
|
|
26
21
|
at_state(2) {
|
27
|
-
for_symbols("
|
22
|
+
for_symbols("_End_", "s") { reduce_with "multiple_pairs" }
|
28
23
|
}
|
29
24
|
|
30
25
|
at_state(0) {
|
31
|
-
for_symbols("
|
32
|
-
for_symbols("
|
33
|
-
for_symbols("
|
34
|
-
for_symbols("Pairs") { shift_to 5 }
|
26
|
+
for_symbols("Pair") { shift_to 6 }
|
27
|
+
for_symbols("s") { shift_to 3 }
|
28
|
+
for_symbols("Pairs") { shift_to 1 }
|
35
29
|
}
|
36
30
|
|
37
|
-
at_state(
|
38
|
-
for_symbols("
|
31
|
+
at_state(6) {
|
32
|
+
for_symbols("_End_", "s") { reduce_with "one_pair" }
|
33
|
+
}
|
34
|
+
|
35
|
+
at_state(3) {
|
36
|
+
for_symbols(":") { shift_to 4 }
|
37
|
+
for_symbols("_End_", "s") { reduce_with "default_keyword_term" }
|
39
38
|
}
|
40
39
|
|
41
40
|
end
|
@@ -1,105 +0,0 @@
|
|
1
|
-
module KeywordSearch
|
2
|
-
|
3
|
-
class Tokenizer < Dhaka::Tokenizer
|
4
|
-
|
5
|
-
def accumulator
|
6
|
-
@accumulator ||= ''
|
7
|
-
end
|
8
|
-
|
9
|
-
def accumulate(string)
|
10
|
-
accumulator << string
|
11
|
-
end
|
12
|
-
|
13
|
-
def clear_accumulator
|
14
|
-
@accumulator = ''
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
# TODO: Add further character support; this is just for initial release
|
19
|
-
letters = ('a'..'z').to_a + ('A'..'Z').to_a
|
20
|
-
numbers = ('0'..'9').to_a
|
21
|
-
extras = %w|_ - ' / \ [ ] { } 1 @ # $ % ^ & * ( ) . , ? < > |
|
22
|
-
printables = letters + numbers + extras
|
23
|
-
whitespace = [' ']
|
24
|
-
quotes = %w|' "|
|
25
|
-
keyword_separator = [':']
|
26
|
-
all_characters = keyword_separator + printables + whitespace + quotes
|
27
|
-
|
28
|
-
for_state :idle_state do
|
29
|
-
|
30
|
-
for_characters(printables) do
|
31
|
-
clear_accumulator
|
32
|
-
switch_to :unquoted_literal_state
|
33
|
-
end
|
34
|
-
|
35
|
-
for_characters(quotes) do
|
36
|
-
advance unless accumulator.empty?
|
37
|
-
clear_accumulator
|
38
|
-
case curr_char
|
39
|
-
when %<">
|
40
|
-
advance
|
41
|
-
switch_to :double_quoted_literal_state
|
42
|
-
when %<'>
|
43
|
-
advance
|
44
|
-
switch_to :single_quoted_literal_state
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
for_characters whitespace do
|
49
|
-
advance
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
for_state :unquoted_literal_state do
|
55
|
-
|
56
|
-
for_characters(printables) do
|
57
|
-
accumulate curr_char
|
58
|
-
advance
|
59
|
-
create_token('s', accumulator) unless curr_char
|
60
|
-
end
|
61
|
-
|
62
|
-
for_characters(keyword_separator) do
|
63
|
-
create_token 'k', accumulator
|
64
|
-
clear_accumulator
|
65
|
-
advance
|
66
|
-
switch_to :idle_state
|
67
|
-
end
|
68
|
-
|
69
|
-
for_characters(whitespace) do
|
70
|
-
create_token 's', accumulator
|
71
|
-
clear_accumulator
|
72
|
-
switch_to :idle_state
|
73
|
-
end
|
74
|
-
|
75
|
-
end
|
76
|
-
|
77
|
-
for_state :double_quoted_literal_state do
|
78
|
-
for_characters(all_characters - %w<">) do
|
79
|
-
accumulate curr_char
|
80
|
-
advance
|
81
|
-
end
|
82
|
-
for_characters %w<"> do
|
83
|
-
create_token 's', accumulator
|
84
|
-
clear_accumulator
|
85
|
-
advance
|
86
|
-
switch_to :idle_state
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
for_state :single_quoted_literal_state do
|
91
|
-
for_characters(all_characters - %w<'>) do
|
92
|
-
accumulate curr_char
|
93
|
-
advance
|
94
|
-
end
|
95
|
-
for_characters %w<'> do
|
96
|
-
create_token 's', accumulator
|
97
|
-
clear_accumulator
|
98
|
-
advance
|
99
|
-
switch_to :idle_state
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
data/test/test_keyword_search.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
require 'lib/keyword_search'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/keyword_search'
|
3
3
|
|
4
4
|
class TestKeywordSearch < Test::Unit::TestCase
|
5
5
|
|
@@ -129,6 +129,17 @@ class TestKeywordSearch < Test::Unit::TestCase
|
|
129
129
|
assert_equal %<okay...>, result
|
130
130
|
end
|
131
131
|
|
132
|
+
def test_parse_error_results_in_exception
|
133
|
+
assert_raises(KeywordSearch::ParseError) do
|
134
|
+
KeywordSearch.search(%<we_do_not_allow:! or ::>) do |with|
|
135
|
+
with.default_keyword :text
|
136
|
+
with.keyword :text do |values|
|
137
|
+
result = values.first
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
132
143
|
end
|
133
144
|
|
134
145
|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: keyword_search
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.2.0
|
7
|
+
date: 2007-05-09 00:00:00 -06:00
|
8
8
|
summary: Generic support for extracting GMail-style search keywords/values from strings
|
9
9
|
require_paths:
|
10
10
|
- lib
|