keyword_search 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Rakefile +16 -0
- data/lib/keyword_search.rb +6 -5
- data/lib/keyword_search/grammar.rb +1 -1
- data/lib/keyword_search/parser.rb +18 -19
- data/lib/keyword_search/tokenizer.rb +0 -105
- data/test/test_keyword_search.rb +12 -1
- metadata +2 -2
data/History.txt
CHANGED
data/Rakefile
CHANGED
@@ -23,4 +23,20 @@ task :rebuild_parser do
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
+
task :rebuild_lexer do
|
27
|
+
require 'dhaka'
|
28
|
+
lexer = Dhaka::Lexer.new(KeywordSearch::LexerSpec)
|
29
|
+
File.open('lib/keyword_search/lexer.rb', 'w') do |file|
|
30
|
+
file << lexer.compile_to_ruby_source_as('KeywordSearch::Lexer')
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
task :rebuild_lexer do
|
35
|
+
require 'dhaka'
|
36
|
+
lexer = Dhaka::Lexer.new(KeywordSearch::LexerSpec)
|
37
|
+
File.open('lib/keyword_search/lexer.rb', 'w') do |file|
|
38
|
+
file << lexer.compile_to_ruby_source_as('KeywordSearch::Lexer')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
26
42
|
# vim: syntax=Ruby
|
data/lib/keyword_search.rb
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
require 'dhaka'
|
2
2
|
|
3
3
|
dirname = File.join(File.dirname(__FILE__), 'keyword_search')
|
4
|
-
%w|grammar
|
4
|
+
%w|grammar parser lexer_spec lexer evaluator definition|.each do |dependency|
|
5
5
|
require File.join(dirname, dependency)
|
6
6
|
end
|
7
7
|
|
8
8
|
module KeywordSearch
|
9
9
|
|
10
|
-
|
10
|
+
class ParseError < ::SyntaxError; end
|
11
|
+
|
12
|
+
VERSION = '1.2.0'
|
11
13
|
|
12
14
|
class << self
|
13
15
|
def search(input_string, definition=nil, &block)
|
14
16
|
@evaluator ||= Evaluator.new
|
15
17
|
definition ||= Definition.new(&block)
|
16
|
-
|
17
|
-
parse_result = Parser.parse(tokens)
|
18
|
+
parse_result = Parser.parse(Lexer.lex(input_string))
|
18
19
|
unless parse_result.has_error?
|
19
20
|
results = @evaluator.evaluate(parse_result.parse_tree)
|
20
21
|
results.each do |key, terms|
|
@@ -22,7 +23,7 @@ module KeywordSearch
|
|
22
23
|
end
|
23
24
|
results
|
24
25
|
else
|
25
|
-
{}
|
26
|
+
raise ParseError, "Unexpected token #{parse_result.unexpected_token.inspect}"
|
26
27
|
end
|
27
28
|
end
|
28
29
|
end
|
@@ -4,38 +4,37 @@ class KeywordSearch::Parser < Dhaka::CompiledParser
|
|
4
4
|
|
5
5
|
start_with 0
|
6
6
|
|
7
|
-
at_state(3) {
|
8
|
-
for_symbols("k", "_End_", "s") { reduce_with "one_pair" }
|
9
|
-
}
|
10
|
-
|
11
7
|
at_state(1) {
|
12
|
-
for_symbols("
|
8
|
+
for_symbols("_End_") { reduce_with "start" }
|
9
|
+
for_symbols("s") { shift_to 3 }
|
10
|
+
for_symbols("Pair") { shift_to 2 }
|
13
11
|
}
|
14
12
|
|
15
|
-
at_state(
|
16
|
-
for_symbols("
|
13
|
+
at_state(5) {
|
14
|
+
for_symbols("_End_", "s") { reduce_with "keyword_and_term" }
|
17
15
|
}
|
18
16
|
|
19
|
-
at_state(
|
20
|
-
for_symbols("s") { shift_to
|
21
|
-
for_symbols("_End_") { reduce_with "start" }
|
22
|
-
for_symbols("k") { shift_to 1 }
|
23
|
-
for_symbols("Pair") { shift_to 6 }
|
17
|
+
at_state(4) {
|
18
|
+
for_symbols("s") { shift_to 5 }
|
24
19
|
}
|
25
20
|
|
26
21
|
at_state(2) {
|
27
|
-
for_symbols("
|
22
|
+
for_symbols("_End_", "s") { reduce_with "multiple_pairs" }
|
28
23
|
}
|
29
24
|
|
30
25
|
at_state(0) {
|
31
|
-
for_symbols("
|
32
|
-
for_symbols("
|
33
|
-
for_symbols("
|
34
|
-
for_symbols("Pairs") { shift_to 5 }
|
26
|
+
for_symbols("Pair") { shift_to 6 }
|
27
|
+
for_symbols("s") { shift_to 3 }
|
28
|
+
for_symbols("Pairs") { shift_to 1 }
|
35
29
|
}
|
36
30
|
|
37
|
-
at_state(
|
38
|
-
for_symbols("
|
31
|
+
at_state(6) {
|
32
|
+
for_symbols("_End_", "s") { reduce_with "one_pair" }
|
33
|
+
}
|
34
|
+
|
35
|
+
at_state(3) {
|
36
|
+
for_symbols(":") { shift_to 4 }
|
37
|
+
for_symbols("_End_", "s") { reduce_with "default_keyword_term" }
|
39
38
|
}
|
40
39
|
|
41
40
|
end
|
@@ -1,105 +0,0 @@
|
|
1
|
-
module KeywordSearch
|
2
|
-
|
3
|
-
class Tokenizer < Dhaka::Tokenizer
|
4
|
-
|
5
|
-
def accumulator
|
6
|
-
@accumulator ||= ''
|
7
|
-
end
|
8
|
-
|
9
|
-
def accumulate(string)
|
10
|
-
accumulator << string
|
11
|
-
end
|
12
|
-
|
13
|
-
def clear_accumulator
|
14
|
-
@accumulator = ''
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
# TODO: Add further character support; this is just for initial release
|
19
|
-
letters = ('a'..'z').to_a + ('A'..'Z').to_a
|
20
|
-
numbers = ('0'..'9').to_a
|
21
|
-
extras = %w|_ - ' / \ [ ] { } 1 @ # $ % ^ & * ( ) . , ? < > |
|
22
|
-
printables = letters + numbers + extras
|
23
|
-
whitespace = [' ']
|
24
|
-
quotes = %w|' "|
|
25
|
-
keyword_separator = [':']
|
26
|
-
all_characters = keyword_separator + printables + whitespace + quotes
|
27
|
-
|
28
|
-
for_state :idle_state do
|
29
|
-
|
30
|
-
for_characters(printables) do
|
31
|
-
clear_accumulator
|
32
|
-
switch_to :unquoted_literal_state
|
33
|
-
end
|
34
|
-
|
35
|
-
for_characters(quotes) do
|
36
|
-
advance unless accumulator.empty?
|
37
|
-
clear_accumulator
|
38
|
-
case curr_char
|
39
|
-
when %<">
|
40
|
-
advance
|
41
|
-
switch_to :double_quoted_literal_state
|
42
|
-
when %<'>
|
43
|
-
advance
|
44
|
-
switch_to :single_quoted_literal_state
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
for_characters whitespace do
|
49
|
-
advance
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
for_state :unquoted_literal_state do
|
55
|
-
|
56
|
-
for_characters(printables) do
|
57
|
-
accumulate curr_char
|
58
|
-
advance
|
59
|
-
create_token('s', accumulator) unless curr_char
|
60
|
-
end
|
61
|
-
|
62
|
-
for_characters(keyword_separator) do
|
63
|
-
create_token 'k', accumulator
|
64
|
-
clear_accumulator
|
65
|
-
advance
|
66
|
-
switch_to :idle_state
|
67
|
-
end
|
68
|
-
|
69
|
-
for_characters(whitespace) do
|
70
|
-
create_token 's', accumulator
|
71
|
-
clear_accumulator
|
72
|
-
switch_to :idle_state
|
73
|
-
end
|
74
|
-
|
75
|
-
end
|
76
|
-
|
77
|
-
for_state :double_quoted_literal_state do
|
78
|
-
for_characters(all_characters - %w<">) do
|
79
|
-
accumulate curr_char
|
80
|
-
advance
|
81
|
-
end
|
82
|
-
for_characters %w<"> do
|
83
|
-
create_token 's', accumulator
|
84
|
-
clear_accumulator
|
85
|
-
advance
|
86
|
-
switch_to :idle_state
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
for_state :single_quoted_literal_state do
|
91
|
-
for_characters(all_characters - %w<'>) do
|
92
|
-
accumulate curr_char
|
93
|
-
advance
|
94
|
-
end
|
95
|
-
for_characters %w<'> do
|
96
|
-
create_token 's', accumulator
|
97
|
-
clear_accumulator
|
98
|
-
advance
|
99
|
-
switch_to :idle_state
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
data/test/test_keyword_search.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
require 'lib/keyword_search'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/keyword_search'
|
3
3
|
|
4
4
|
class TestKeywordSearch < Test::Unit::TestCase
|
5
5
|
|
@@ -129,6 +129,17 @@ class TestKeywordSearch < Test::Unit::TestCase
|
|
129
129
|
assert_equal %<okay...>, result
|
130
130
|
end
|
131
131
|
|
132
|
+
def test_parse_error_results_in_exception
|
133
|
+
assert_raises(KeywordSearch::ParseError) do
|
134
|
+
KeywordSearch.search(%<we_do_not_allow:! or ::>) do |with|
|
135
|
+
with.default_keyword :text
|
136
|
+
with.keyword :text do |values|
|
137
|
+
result = values.first
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
132
143
|
end
|
133
144
|
|
134
145
|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: keyword_search
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.2.0
|
7
|
+
date: 2007-05-09 00:00:00 -06:00
|
8
8
|
summary: Generic support for extracting GMail-style search keywords/values from strings
|
9
9
|
require_paths:
|
10
10
|
- lib
|