keyword_search 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +11 -0
- data/README.txt +96 -0
- data/Rakefile +25 -0
- data/lib/keyword_search.rb +25 -0
- data/lib/keyword_search/definition.rb +45 -0
- data/lib/keyword_search/evaluator.rb +33 -0
- data/lib/keyword_search/grammar.rb +21 -0
- data/lib/keyword_search/parser.rb +49 -0
- data/lib/keyword_search/tokenizer.rb +68 -0
- data/test/test_keyword_search.rb +70 -0
- metadata +64 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
lib/keyword_search.rb
|
6
|
+
lib/keyword_search/definition.rb
|
7
|
+
lib/keyword_search/evaluator.rb
|
8
|
+
lib/keyword_search/grammar.rb
|
9
|
+
lib/keyword_search/parser.rb
|
10
|
+
lib/keyword_search/tokenizer.rb
|
11
|
+
test/test_keyword_search.rb
|
data/README.txt
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
keyword_search
|
2
|
+
http://codefluency.rubyforge.org/keyword_search
|
3
|
+
by Bruce Williams
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Generic library to parse GMail-style search strings for keyword/value pairs; supports definition of valid keywords and handling of quoted values.
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
The library features a very simple, easy-to-use API.
|
12
|
+
* Define handlers for supported keywords with blocks
|
13
|
+
* Define the default keyword (values not part of a keyword/value pair)
|
14
|
+
|
15
|
+
Various notes:
|
16
|
+
* Quoted values are supported.
|
17
|
+
* Input is automatically downcased (both keywords and values should be assumed to be in lowercase)
|
18
|
+
|
19
|
+
Development Roadmap:
|
20
|
+
1.1:: Expand supported character set for keywords and values
|
21
|
+
(currently supports a-z)
|
22
|
+
2.0:: Add negation and grouping (will break API backwards compatibility)
|
23
|
+
|
24
|
+
== SYNOPSIS:
|
25
|
+
|
26
|
+
Here's an example of usage from Rails.
|
27
|
+
* Note that the library is generic, and could presumably be used for any Ruby project.
|
28
|
+
|
29
|
+
# Some variables to build up
|
30
|
+
clauses = []
|
31
|
+
arguments = []
|
32
|
+
|
33
|
+
# Search a string, defining the supported keywords and building up
|
34
|
+
# the variables in the associated closures
|
35
|
+
KeywordSearch.search('account has:attachment since:2006-12-03') do |with|
|
36
|
+
|
37
|
+
with.default_keyword :title
|
38
|
+
|
39
|
+
with.keyword :title do |values|
|
40
|
+
clauses << "title like ?"
|
41
|
+
arguments << "%#{values.join(' ')}%"
|
42
|
+
end
|
43
|
+
|
44
|
+
with.keyword :has do |values|
|
45
|
+
clauses << 'has_attachment = true' if values.include?('attachment')
|
46
|
+
end
|
47
|
+
|
48
|
+
with.keyword :since do |values|
|
49
|
+
date = Date.parse(values.first) # only support one
|
50
|
+
clauses << 'created_on >= ?'
|
51
|
+
arguments << date.to_s
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
# Do our search with <tt>clauses</tt> and <tt>arguments</tt>
|
57
|
+
conditions = [clauses.map{|c| "(#{c})"}.join(' AND ')), *arguments] # simplistic example
|
58
|
+
results = Message.find(:all, :conditions => conditions)
|
59
|
+
|
60
|
+
== REQUIREMENTS:
|
61
|
+
|
62
|
+
* dhaka
|
63
|
+
* hoe
|
64
|
+
|
65
|
+
== INSTALL:
|
66
|
+
|
67
|
+
sudo gem install keyword_search
|
68
|
+
|
69
|
+
== LICENSE:
|
70
|
+
|
71
|
+
(The MIT License)
|
72
|
+
|
73
|
+
Copyright (c) 2007 Bruce Williams
|
74
|
+
|
75
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
76
|
+
a copy of this software and associated documentation files (the
|
77
|
+
'Software'), to deal in the Software without restriction, including
|
78
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
79
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
80
|
+
permit persons to whom the Software is furnished to do so, subject to
|
81
|
+
the following conditions:
|
82
|
+
|
83
|
+
The above copyright notice and this permission notice shall be
|
84
|
+
included in all copies or substantial portions of the Software.
|
85
|
+
|
86
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
87
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
88
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
89
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
90
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
91
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
92
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
93
|
+
|
94
|
+
== LEGAL NOTES
|
95
|
+
|
96
|
+
GMail is copyright Google, Inc.
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require './lib/keyword_search.rb'
|
6
|
+
|
7
|
+
Hoe.new('keyword_search', KeywordSearch::VERSION) do |p|
|
8
|
+
p.rubyforge_name = 'keyword_search'
|
9
|
+
p.summary = 'Generic support for extracting GMail-style search keywords/values from strings'
|
10
|
+
p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
|
11
|
+
p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
|
12
|
+
p.author = 'Bruce Williams'
|
13
|
+
p.email = 'bruce@codefluency.com'
|
14
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
15
|
+
end
|
16
|
+
|
17
|
+
task :rebuild_parser do
|
18
|
+
require 'dhaka'
|
19
|
+
parser = Dhaka::Parser.new(KeywordSearch::Grammar)
|
20
|
+
File.open('lib/keyword_search/parser.rb', 'w') do |file|
|
21
|
+
file << parser.compile_to_ruby_source_as('KeywordSearch::Parser')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# vim: syntax=Ruby
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'dhaka'
|
2
|
+
|
3
|
+
dirname = File.join(File.dirname(__FILE__), 'keyword_search')
|
4
|
+
%w|grammar tokenizer parser evaluator definition|.each do |dependency|
|
5
|
+
require File.join(dirname, dependency)
|
6
|
+
end
|
7
|
+
|
8
|
+
module KeywordSearch
|
9
|
+
|
10
|
+
VERSION = '1.0.0'
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def search(input_string, definition=nil, &block)
|
14
|
+
definition ||= Definition.new(&block)
|
15
|
+
tokens = Tokenizer.tokenize(input_string.downcase)
|
16
|
+
parse_result = Parser.parse(tokens)
|
17
|
+
unless parse_result.has_error?
|
18
|
+
Evaluator.new(parse_result.syntax_tree).result.each do |key, terms|
|
19
|
+
definition.handle(key, terms)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module KeywordSearch
|
2
|
+
|
3
|
+
class Definition
|
4
|
+
|
5
|
+
class Keyword
|
6
|
+
|
7
|
+
attr_reader :name, :description
|
8
|
+
|
9
|
+
def initialize(name, description=nil, &handler)
|
10
|
+
@name, @description = name, description
|
11
|
+
@handler = handler
|
12
|
+
end
|
13
|
+
|
14
|
+
def handle(value)
|
15
|
+
@handler.call(value)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :keywords
|
21
|
+
def initialize
|
22
|
+
@keywords = []
|
23
|
+
@default_keyword = nil
|
24
|
+
yield self if block_given?
|
25
|
+
end
|
26
|
+
|
27
|
+
def keyword(name, description=nil, &block)
|
28
|
+
@keywords << Keyword.new(name, description, &block)
|
29
|
+
end
|
30
|
+
|
31
|
+
def default_keyword(name)
|
32
|
+
@default_keyword = name
|
33
|
+
end
|
34
|
+
|
35
|
+
def handle(key, values)
|
36
|
+
key = @default_keyword if key == :default
|
37
|
+
return false unless key
|
38
|
+
if k = @keywords.detect{|kw| kw.name == key.to_sym}
|
39
|
+
k.handle(values)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module KeywordSearch
|
2
|
+
|
3
|
+
class Evaluator < Dhaka::Evaluator
|
4
|
+
|
5
|
+
self.grammar = Grammar
|
6
|
+
|
7
|
+
define_evaluation_rules do
|
8
|
+
|
9
|
+
for_multiple_pairs do
|
10
|
+
child_nodes[1].each do |key, terms|
|
11
|
+
child_nodes[0][key] ||= []
|
12
|
+
child_nodes[0][key] += terms
|
13
|
+
end
|
14
|
+
child_nodes[0]
|
15
|
+
end
|
16
|
+
|
17
|
+
for_one_pair do
|
18
|
+
child_nodes[0]
|
19
|
+
end
|
20
|
+
|
21
|
+
for_keyword_and_term do
|
22
|
+
{child_nodes[0].token.value => [child_nodes[1].token.value]}
|
23
|
+
end
|
24
|
+
|
25
|
+
for_default_keyword_term do
|
26
|
+
{:default => [child_nodes[0].token.value]}
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module KeywordSearch
|
2
|
+
|
3
|
+
class Grammar < Dhaka::Grammar
|
4
|
+
|
5
|
+
for_symbol(Dhaka::START_SYMBOL_NAME) do
|
6
|
+
start ['Pairs']
|
7
|
+
end
|
8
|
+
|
9
|
+
for_symbol 'Pairs' do
|
10
|
+
one_pair ['Pair']
|
11
|
+
multiple_pairs ['Pairs', 'Pair']
|
12
|
+
end
|
13
|
+
|
14
|
+
for_symbol 'Pair' do
|
15
|
+
keyword_and_term ['k', 's']
|
16
|
+
default_keyword_term ['s']
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class KeywordSearch::Parser < Dhaka::CompiledParser
|
2
|
+
|
3
|
+
self.grammar = KeywordSearch::Grammar
|
4
|
+
|
5
|
+
start_with 0
|
6
|
+
|
7
|
+
at_state(3) {
|
8
|
+
for_symbol('k') { reduce_with 'default_keyword_term' }
|
9
|
+
for_symbol('_End_') { reduce_with 'default_keyword_term' }
|
10
|
+
for_symbol('s') { reduce_with 'default_keyword_term' }
|
11
|
+
}
|
12
|
+
|
13
|
+
at_state(0) {
|
14
|
+
for_symbol('k') { shift_to 4 }
|
15
|
+
for_symbol('Pairs') { shift_to 1 }
|
16
|
+
for_symbol('s') { shift_to 3 }
|
17
|
+
for_symbol('Pair') { shift_to 6 }
|
18
|
+
}
|
19
|
+
|
20
|
+
at_state(4) {
|
21
|
+
for_symbol('s') { shift_to 5 }
|
22
|
+
}
|
23
|
+
|
24
|
+
at_state(5) {
|
25
|
+
for_symbol('k') { reduce_with 'keyword_and_term' }
|
26
|
+
for_symbol('_End_') { reduce_with 'keyword_and_term' }
|
27
|
+
for_symbol('s') { reduce_with 'keyword_and_term' }
|
28
|
+
}
|
29
|
+
|
30
|
+
at_state(1) {
|
31
|
+
for_symbol('k') { shift_to 4 }
|
32
|
+
for_symbol('_End_') { reduce_with 'start' }
|
33
|
+
for_symbol('s') { shift_to 3 }
|
34
|
+
for_symbol('Pair') { shift_to 2 }
|
35
|
+
}
|
36
|
+
|
37
|
+
at_state(6) {
|
38
|
+
for_symbol('k') { reduce_with 'one_pair' }
|
39
|
+
for_symbol('_End_') { reduce_with 'one_pair' }
|
40
|
+
for_symbol('s') { reduce_with 'one_pair' }
|
41
|
+
}
|
42
|
+
|
43
|
+
at_state(2) {
|
44
|
+
for_symbol('k') { reduce_with 'multiple_pairs' }
|
45
|
+
for_symbol('_End_') { reduce_with 'multiple_pairs' }
|
46
|
+
for_symbol('s') { reduce_with 'multiple_pairs' }
|
47
|
+
}
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module KeywordSearch
|
2
|
+
|
3
|
+
class Tokenizer < Dhaka::Tokenizer
|
4
|
+
|
5
|
+
letters = ('a'..'z').to_a
|
6
|
+
numbers = ('0'..'9').to_a
|
7
|
+
extras = %w|_ - '|
|
8
|
+
printables = letters + numbers + extras
|
9
|
+
whitespace = [' ']
|
10
|
+
quotes = ['"']
|
11
|
+
keyword_separator = [':']
|
12
|
+
all_characters = keyword_separator + printables + whitespace
|
13
|
+
|
14
|
+
for_state :idle_state do
|
15
|
+
|
16
|
+
for_characters(printables) do
|
17
|
+
self.accumulator = ''
|
18
|
+
switch_to :unquoted_literal_state
|
19
|
+
end
|
20
|
+
|
21
|
+
for_characters quotes do
|
22
|
+
self.accumulator = ''
|
23
|
+
advance
|
24
|
+
switch_to :quoted_literal_state
|
25
|
+
end
|
26
|
+
|
27
|
+
for_characters whitespace do
|
28
|
+
advance
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
for_state :unquoted_literal_state do
|
34
|
+
|
35
|
+
for_characters(printables) do
|
36
|
+
self.accumulator += curr_char
|
37
|
+
advance
|
38
|
+
tokens << Dhaka::Token.new(Grammar.symbol_for_name('s'), accumulator) unless curr_char
|
39
|
+
end
|
40
|
+
|
41
|
+
for_characters(keyword_separator) do
|
42
|
+
tokens << Dhaka::Token.new(Grammar.symbol_for_name('k'), self.accumulator)
|
43
|
+
advance
|
44
|
+
switch_to :idle_state
|
45
|
+
end
|
46
|
+
|
47
|
+
for_characters(whitespace) do
|
48
|
+
tokens << Dhaka::Token.new(Grammar.symbol_for_name('s'), self.accumulator)
|
49
|
+
switch_to :idle_state
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
for_state :quoted_literal_state do
|
55
|
+
for_characters(all_characters - quotes) do
|
56
|
+
self.accumulator += curr_char
|
57
|
+
advance
|
58
|
+
end
|
59
|
+
for_characters quotes do
|
60
|
+
tokens << Dhaka::Token.new(Grammar.symbol_for_name('s'), self.accumulator)
|
61
|
+
advance
|
62
|
+
switch_to :idle_state
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'lib/keyword_search'
|
3
|
+
|
4
|
+
class TestKeywordSearch < Test::Unit::TestCase
|
5
|
+
|
6
|
+
NAME_AND_AGE = %<bruce williams age:26>
|
7
|
+
NAME_QUOTED_AND_AGE = %<"bruce williams" age:26>
|
8
|
+
NAME_AND_QUOTED_AGE = %<bruce williams age:"26">
|
9
|
+
DEFAULT_AGE_WITH_QUOTED_AGE = %<26 name:"bruce williams">
|
10
|
+
|
11
|
+
def test_default_keyword
|
12
|
+
result = nil
|
13
|
+
KeywordSearch.search(NAME_AND_AGE) do |with|
|
14
|
+
with.default_keyword :name
|
15
|
+
with.keyword :name do |values|
|
16
|
+
result = values.join(' ')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
assert_equal 'bruce williams', result
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_unquoted_keyword_term
|
23
|
+
result = nil
|
24
|
+
KeywordSearch.search(NAME_AND_AGE) do |with|
|
25
|
+
with.keyword :age do |values|
|
26
|
+
result = Integer(values.first)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
assert_equal 26, result
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_quoted_default_keyword_term
|
33
|
+
result = nil
|
34
|
+
KeywordSearch.search(NAME_QUOTED_AND_AGE) do |with|
|
35
|
+
with.default_keyword :name
|
36
|
+
with.keyword :name do |values|
|
37
|
+
result = values.join(' ')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
assert_equal 'bruce williams', result
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_quoted_keyword_term
|
44
|
+
result = nil
|
45
|
+
KeywordSearch.search(NAME_AND_QUOTED_AGE) do |with|
|
46
|
+
with.keyword :age do |values|
|
47
|
+
result = Integer(values.first)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
assert_equal 26, result
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_quoted_keyword_term_with_whitespace
|
54
|
+
result = nil
|
55
|
+
KeywordSearch.search(DEFAULT_AGE_WITH_QUOTED_AGE) do |with|
|
56
|
+
with.default_keyword :age
|
57
|
+
with.keyword :name do |values|
|
58
|
+
result = values.first
|
59
|
+
end
|
60
|
+
end
|
61
|
+
assert_equal 'bruce williams', result
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: keyword_search
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2007-01-08 00:00:00 -07:00
|
8
|
+
summary: Generic support for extracting GMail-style search keywords/values from strings
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: bruce@codefluency.com
|
12
|
+
homepage: " http://codefluency.rubyforge.org/keyword_search"
|
13
|
+
rubyforge_project: keyword_search
|
14
|
+
description: "== FEATURES/PROBLEMS: The library features a very simple, easy-to-use API. * Define handlers for supported keywords with blocks * Define the default keyword (values not part of a keyword/value pair) Various notes: * Quoted values are supported. * Input is automatically downcased (both keywords and values should be assumed to be in lowercase) Development Roadmap: 1.1:: Expand supported character set for keywords and values (currently supports a-z) 2.0:: Add negation and grouping (will break API backwards compatibility) == SYNOPSIS:"
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Bruce Williams
|
31
|
+
files:
|
32
|
+
- History.txt
|
33
|
+
- Manifest.txt
|
34
|
+
- README.txt
|
35
|
+
- Rakefile
|
36
|
+
- lib/keyword_search.rb
|
37
|
+
- lib/keyword_search/definition.rb
|
38
|
+
- lib/keyword_search/evaluator.rb
|
39
|
+
- lib/keyword_search/grammar.rb
|
40
|
+
- lib/keyword_search/parser.rb
|
41
|
+
- lib/keyword_search/tokenizer.rb
|
42
|
+
- test/test_keyword_search.rb
|
43
|
+
test_files:
|
44
|
+
- test/test_keyword_search.rb
|
45
|
+
rdoc_options: []
|
46
|
+
|
47
|
+
extra_rdoc_files: []
|
48
|
+
|
49
|
+
executables: []
|
50
|
+
|
51
|
+
extensions: []
|
52
|
+
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
dependencies:
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: hoe
|
58
|
+
version_requirement:
|
59
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 1.1.6
|
64
|
+
version:
|