string_utility_belt 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/string_utility_belt.rb +1 -1
- data/lib/string_utility_belt/general.rb +23 -16
- data/lib/string_utility_belt/match_rank.rb +82 -25
- data/lib/string_utility_belt/regex_me.rb +1 -0
- data/lib/string_utility_belt/regex_me/lib/builders.rb +99 -0
- data/lib/string_utility_belt/regex_me/lib/configurations.rb +10 -0
- data/lib/string_utility_belt/regex_me/lib/helpers.rb +102 -0
- data/lib/string_utility_belt/regex_me/regex_me.rb +5 -0
- data/lib/string_utility_belt/version.rb +1 -1
- metadata +28 -10
- data/lib/string_utility_belt/regex_me_helper.rb +0 -100
- data/lib/string_utility_belt/regex_me_to_search.rb +0 -107
data/lib/string_utility_belt.rb
CHANGED
@@ -1,41 +1,48 @@
|
|
1
|
-
require 'string_utility_belt/
|
1
|
+
require 'string_utility_belt/regex_me'
|
2
2
|
|
3
3
|
module StringUtilityBelt
|
4
4
|
module General
|
5
5
|
class GENERAL
|
6
6
|
CASE_INSENSITIVE_OPT = {:case_insensitive => true}
|
7
7
|
|
8
|
-
def
|
9
|
-
@string
|
8
|
+
def initialize(string)
|
9
|
+
@string = string
|
10
|
+
end
|
11
|
+
|
12
|
+
def have_this_words?(words_to_match, options)
|
10
13
|
@arguments = options
|
11
14
|
|
12
15
|
for word in words_to_match
|
13
|
-
return false if
|
16
|
+
return false if not word_is_found_in_the_string?(word)
|
14
17
|
end
|
15
18
|
|
16
19
|
return true
|
17
20
|
end
|
18
21
|
|
19
22
|
private
|
20
|
-
def
|
21
|
-
@string
|
23
|
+
def word_is_found_in_the_string?(word)
|
24
|
+
@string =~ word.regex_me_to_search_ruby(arguments)
|
22
25
|
end
|
23
26
|
|
24
27
|
def arguments
|
25
|
-
if
|
28
|
+
if args_is_nil? or args_is_boolean?
|
26
29
|
CASE_INSENSITIVE_OPT.merge({:exact_word => @arguments})
|
27
|
-
elsif
|
30
|
+
elsif args_is_hash?
|
28
31
|
@arguments.merge(CASE_INSENSITIVE_OPT)
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
32
|
-
def
|
35
|
+
def args_is_boolean?
|
33
36
|
@arguments.instance_of?(FalseClass) || @arguments.instance_of?(TrueClass)
|
34
37
|
end
|
35
38
|
|
36
|
-
def
|
39
|
+
def args_is_hash?
|
37
40
|
@arguments.instance_of?(Hash)
|
38
41
|
end
|
42
|
+
|
43
|
+
def args_is_nil?
|
44
|
+
@arguments.nil?
|
45
|
+
end
|
39
46
|
end
|
40
47
|
|
41
48
|
WORD_PATTERN = /\w[\w\'\-]*/
|
@@ -55,14 +62,14 @@ module StringUtilityBelt
|
|
55
62
|
self.gsub!(ANY_SPACE_PATTERN, SIMPLE_SPACE)
|
56
63
|
end
|
57
64
|
|
58
|
-
def have_this_words?(words_to_match, options =
|
59
|
-
i = GENERAL.new
|
60
|
-
i.have_this_words?(
|
65
|
+
def have_this_words?(words_to_match, options = nil)
|
66
|
+
i = GENERAL.new(self)
|
67
|
+
i.have_this_words?(words_to_match, options)
|
61
68
|
end
|
62
69
|
|
63
|
-
def not_have_this_words?(words_to_match, options =
|
64
|
-
i = GENERAL.new
|
65
|
-
!i.have_this_words?(
|
70
|
+
def not_have_this_words?(words_to_match, options = nil)
|
71
|
+
i = GENERAL.new(self)
|
72
|
+
!i.have_this_words?(words_to_match, options)
|
66
73
|
end
|
67
74
|
end
|
68
75
|
end
|
@@ -1,46 +1,103 @@
|
|
1
|
-
require 'string_utility_belt/
|
1
|
+
require 'string_utility_belt/regex_me'
|
2
2
|
|
3
3
|
module StringUtilityBelt
|
4
4
|
module MatchRank
|
5
|
+
class MATCHRANK
|
6
|
+
TOTAL_TEMPLATE = {:exact => 0, :matched => 0}
|
7
|
+
SCORE_TEMPLATE = TOTAL_TEMPLATE.merge({:precision => 0})
|
5
8
|
|
6
|
-
|
7
|
-
|
8
|
-
freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
9
|
-
freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
9
|
+
def initialize(string)
|
10
|
+
@text = string
|
10
11
|
end
|
11
|
-
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
def frequency(options)
|
14
|
+
@options = options
|
15
|
+
@result = result_template
|
16
|
+
|
17
|
+
for @search_word in search_words
|
18
|
+
for @text_word in @text.words
|
19
|
+
count_exact_matches
|
20
|
+
count_fragmet_matches
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
calculate_the_precision if @options[:template] == :precision
|
25
|
+
|
26
|
+
return @result
|
17
27
|
end
|
18
|
-
end
|
19
28
|
|
20
|
-
|
21
|
-
|
22
|
-
|
29
|
+
private
|
30
|
+
|
31
|
+
def search_words
|
32
|
+
words = @options[:search_words]
|
33
|
+
words.instance_of?(Array) ? words : words.to_s.words
|
34
|
+
end
|
23
35
|
|
24
|
-
|
36
|
+
def count_exact_matches
|
37
|
+
incr_result(:exact) if find_the_wanted_word(:with_precision => true)
|
38
|
+
end
|
25
39
|
|
26
|
-
|
27
|
-
|
40
|
+
def count_fragmet_matches
|
41
|
+
incr_result(:matched) if find_the_wanted_word(:with_precision => false)
|
42
|
+
end
|
28
43
|
|
29
|
-
|
44
|
+
def calculate_the_precision
|
45
|
+
@result[:precision] = (@result[:exact].to_f / @result[:matched].to_f) * 100
|
46
|
+
end
|
47
|
+
|
48
|
+
def find_the_wanted_word(option)
|
49
|
+
@text_word =~ matcher(option[:with_precision])
|
50
|
+
end
|
30
51
|
|
31
|
-
|
32
|
-
|
33
|
-
|
52
|
+
def matcher(precision)
|
53
|
+
@search_word.regex_me_to_search_ruby(:exact_word => precision,
|
54
|
+
:case_insensitive => true)
|
55
|
+
end
|
34
56
|
|
35
|
-
|
36
|
-
|
37
|
-
|
57
|
+
def incr_result(key)
|
58
|
+
case @options[:template]
|
59
|
+
when :grouped_words
|
60
|
+
begin
|
61
|
+
@result[key][@search_word] += 1
|
62
|
+
rescue
|
63
|
+
@result[key][@search_word] = 0
|
64
|
+
@result[key][@search_word] += 1
|
65
|
+
end
|
66
|
+
else
|
67
|
+
@result[key] += 1
|
38
68
|
end
|
39
69
|
end
|
40
70
|
|
41
|
-
|
71
|
+
def result_template
|
72
|
+
templates.fetch(@options[:template]).clone
|
73
|
+
end
|
74
|
+
|
75
|
+
def templates
|
76
|
+
{
|
77
|
+
:total => TOTAL_TEMPLATE,
|
78
|
+
:precision => SCORE_TEMPLATE,
|
79
|
+
:grouped_words => {:exact => {}, :matched => {}}
|
80
|
+
}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def total_frequency_by(words)
|
85
|
+
measure.frequency(:template => :total, :search_words => words)
|
42
86
|
end
|
43
87
|
|
88
|
+
def words_frequency_by(words)
|
89
|
+
measure.frequency(:template => :grouped_words, :search_words => words)
|
90
|
+
end
|
91
|
+
|
92
|
+
def match_and_score_by(words)
|
93
|
+
measure.frequency(:template => :precision, :search_words => words)
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def measure
|
99
|
+
MATCHRANK.new(self)
|
100
|
+
end
|
44
101
|
end
|
45
102
|
end
|
46
103
|
|
@@ -0,0 +1 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/regex_me/regex_me'
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/configurations'
|
2
|
+
require File.dirname(__FILE__) + '/helpers'
|
3
|
+
|
4
|
+
module RegexMe
|
5
|
+
module Builders
|
6
|
+
include RegexMe::Configurations::Builder
|
7
|
+
|
8
|
+
def regex_me_to_search_ruby(options = {})
|
9
|
+
regex_me_to_search(:ruby, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def regex_me_to_search_mysql(options = {})
|
13
|
+
regex_me_to_search(:mysql, options)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def options_handler(options)
|
19
|
+
handled = \
|
20
|
+
{:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
|
21
|
+
:multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
|
22
|
+
:or => (options[:or] == false ? false : true)}
|
23
|
+
|
24
|
+
return options.merge(handled)
|
25
|
+
end
|
26
|
+
|
27
|
+
def regex_me_to_search(env, options)
|
28
|
+
return EMPTYs[env] if self.strip.empty?
|
29
|
+
|
30
|
+
execute_builder(env, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
def execute_builder(env, options)
|
34
|
+
opt_handled = options_handler(options)
|
35
|
+
|
36
|
+
builder_result = builder(env, opt_handled)
|
37
|
+
|
38
|
+
case env
|
39
|
+
when :ruby
|
40
|
+
options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
|
41
|
+
Regexp.new(builder_result, *options)
|
42
|
+
when :mysql
|
43
|
+
builder_result
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def builder(border_to, options)
|
48
|
+
string = self
|
49
|
+
|
50
|
+
lcv = options[:latin_chars_variations]
|
51
|
+
|
52
|
+
if options[:exact_phrase]
|
53
|
+
@regexp = \
|
54
|
+
string \
|
55
|
+
.strip.simple_space \
|
56
|
+
.regex_latin_ci_list \
|
57
|
+
.gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
|
58
|
+
.regex_builder(:or => false,
|
59
|
+
:border => {:to => border_to,
|
60
|
+
:direction => :both})
|
61
|
+
else
|
62
|
+
@regexp = '('
|
63
|
+
|
64
|
+
for word in string.strip.split
|
65
|
+
if options[:exact_word]
|
66
|
+
@regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
|
67
|
+
elsif have_the_any_char?(word)
|
68
|
+
@regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
|
69
|
+
else
|
70
|
+
@regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
@regexp = (@regexp << ')').sub!(/\|\)/,')')
|
75
|
+
end
|
76
|
+
|
77
|
+
return @regexp
|
78
|
+
end
|
79
|
+
|
80
|
+
def have_the_any_char?(string)
|
81
|
+
string.include?('*')
|
82
|
+
end
|
83
|
+
|
84
|
+
def border(to, word)
|
85
|
+
direction = nil
|
86
|
+
|
87
|
+
case word
|
88
|
+
when/^\*/
|
89
|
+
direction = :right
|
90
|
+
when /\*$/
|
91
|
+
direction = :left
|
92
|
+
when /^.*\*.*$/
|
93
|
+
direction = :both
|
94
|
+
end
|
95
|
+
|
96
|
+
{:to => to, :direction => direction}
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module RegexMe
|
5
|
+
module Helper
|
6
|
+
A_VARIATIONS = "(a|à|á|â|ã|ä)"
|
7
|
+
E_VARIATIONS = "(e|è|é|ê|ë)"
|
8
|
+
I_VARIATIONS = "(i|ì|í|î|ï)"
|
9
|
+
O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
|
10
|
+
U_VARIATIONS = "(u|ù|ú|û|ü)"
|
11
|
+
C_VARIATIONS = "(c|ç)"
|
12
|
+
N_VARIATIONS = "(n|ñ)"
|
13
|
+
|
14
|
+
LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
|
15
|
+
E_VARIATIONS,
|
16
|
+
I_VARIATIONS,
|
17
|
+
O_VARIATIONS,
|
18
|
+
U_VARIATIONS,
|
19
|
+
C_VARIATIONS,
|
20
|
+
N_VARIATIONS]
|
21
|
+
|
22
|
+
BORDER_TO = {
|
23
|
+
:ruby => {:left => '\b', :right => '\b' },
|
24
|
+
:mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
|
25
|
+
}
|
26
|
+
|
27
|
+
def regex_latin_ci_list
|
28
|
+
memo = ""
|
29
|
+
|
30
|
+
self.each_char do |char|
|
31
|
+
changed = false
|
32
|
+
|
33
|
+
for variations in LATIN_CHARS_VARIATIONS
|
34
|
+
variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
|
35
|
+
|
36
|
+
if char =~ variations_pattern
|
37
|
+
changed = true
|
38
|
+
memo.insert(-1, variations)
|
39
|
+
break
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
memo.insert(-1, char) unless changed
|
44
|
+
end
|
45
|
+
|
46
|
+
self.replace(memo)
|
47
|
+
end
|
48
|
+
|
49
|
+
def regex_builder(options)
|
50
|
+
if options[:any]
|
51
|
+
replace_the_any_char_per_any_pattern
|
52
|
+
end
|
53
|
+
|
54
|
+
if options[:latin_chars_variations]
|
55
|
+
replace_chars_includeds_in_latin_variation_list
|
56
|
+
end
|
57
|
+
|
58
|
+
if options[:border]
|
59
|
+
insert_border(options[:border])
|
60
|
+
end
|
61
|
+
|
62
|
+
if options[:or]
|
63
|
+
insert_OR
|
64
|
+
end
|
65
|
+
|
66
|
+
return self
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def replace_the_any_char_per_any_pattern
|
71
|
+
self.gsub!(/\*/, '.*')
|
72
|
+
end
|
73
|
+
|
74
|
+
def replace_chars_includeds_in_latin_variation_list
|
75
|
+
self.regex_latin_ci_list
|
76
|
+
end
|
77
|
+
|
78
|
+
def insert_border(options)
|
79
|
+
border = BORDER_TO[options[:to]]
|
80
|
+
|
81
|
+
case options[:direction]
|
82
|
+
when :left
|
83
|
+
self.insert(0, border[:left])
|
84
|
+
when :right
|
85
|
+
self.insert(-1, border[:right])
|
86
|
+
when :both
|
87
|
+
self.insert(0, border[:left]).insert(-1, border[:right])
|
88
|
+
else
|
89
|
+
self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def insert_OR
|
94
|
+
self.insert(-1, "|")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
class String
|
101
|
+
include StringUtilityBelt::RegexMe::Helper
|
102
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_utility_belt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Rodrigo Serradura
|
@@ -15,10 +15,25 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07-
|
19
|
-
dependencies:
|
20
|
-
|
21
|
-
|
18
|
+
date: 2011-07-12 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: htmlentities
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - "="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 51
|
29
|
+
segments:
|
30
|
+
- 4
|
31
|
+
- 3
|
32
|
+
- 0
|
33
|
+
version: 4.3.0
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
description: Adds new features for String objects.
|
22
37
|
email:
|
23
38
|
- rserradura@gmail.com
|
24
39
|
executables: []
|
@@ -37,8 +52,11 @@ files:
|
|
37
52
|
- lib/string_utility_belt/entities.rb
|
38
53
|
- lib/string_utility_belt/general.rb
|
39
54
|
- lib/string_utility_belt/match_rank.rb
|
40
|
-
- lib/string_utility_belt/
|
41
|
-
- lib/string_utility_belt/
|
55
|
+
- lib/string_utility_belt/regex_me.rb
|
56
|
+
- lib/string_utility_belt/regex_me/lib/builders.rb
|
57
|
+
- lib/string_utility_belt/regex_me/lib/configurations.rb
|
58
|
+
- lib/string_utility_belt/regex_me/lib/helpers.rb
|
59
|
+
- lib/string_utility_belt/regex_me/regex_me.rb
|
42
60
|
- lib/string_utility_belt/tags.rb
|
43
61
|
- lib/string_utility_belt/version.rb
|
44
62
|
- test/string_utility_belt/entities_test.rb
|
@@ -80,6 +98,6 @@ rubyforge_project: string_utility_belt
|
|
80
98
|
rubygems_version: 1.8.2
|
81
99
|
signing_key:
|
82
100
|
specification_version: 3
|
83
|
-
summary:
|
101
|
+
summary: Useful methods to handle strings
|
84
102
|
test_files: []
|
85
103
|
|
@@ -1,100 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
module RegexMe
|
4
|
-
module Helper
|
5
|
-
A_VARIATIONS = "(a|à|á|â|ã|ä)"
|
6
|
-
E_VARIATIONS = "(e|è|é|ê|ë)"
|
7
|
-
I_VARIATIONS = "(i|ì|í|î|ï)"
|
8
|
-
O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
|
9
|
-
U_VARIATIONS = "(u|ù|ú|û|ü)"
|
10
|
-
C_VARIATIONS = "(c|ç)"
|
11
|
-
N_VARIATIONS = "(n|ñ)"
|
12
|
-
|
13
|
-
LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
|
14
|
-
E_VARIATIONS,
|
15
|
-
I_VARIATIONS,
|
16
|
-
O_VARIATIONS,
|
17
|
-
U_VARIATIONS,
|
18
|
-
C_VARIATIONS,
|
19
|
-
N_VARIATIONS]
|
20
|
-
|
21
|
-
BORDER_TO = {
|
22
|
-
:ruby => {:left => '\b', :right => '\b' },
|
23
|
-
:mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
|
24
|
-
}
|
25
|
-
|
26
|
-
def regex_latin_ci_list
|
27
|
-
memo = ""
|
28
|
-
|
29
|
-
self.each_char do |char|
|
30
|
-
changed = false
|
31
|
-
|
32
|
-
for variations in LATIN_CHARS_VARIATIONS
|
33
|
-
variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
|
34
|
-
|
35
|
-
if char =~ variations_pattern
|
36
|
-
changed = true
|
37
|
-
memo.insert(-1, variations)
|
38
|
-
break
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
memo.insert(-1, char) unless changed
|
43
|
-
end
|
44
|
-
|
45
|
-
self.replace(memo)
|
46
|
-
end
|
47
|
-
|
48
|
-
def regex_builder(options)
|
49
|
-
if options[:any]
|
50
|
-
replace_the_any_char_per_any_pattern
|
51
|
-
end
|
52
|
-
|
53
|
-
if options[:latin_chars_variations]
|
54
|
-
replace_chars_includeds_in_latin_variation_list
|
55
|
-
end
|
56
|
-
|
57
|
-
if options[:border]
|
58
|
-
insert_border(options[:border])
|
59
|
-
end
|
60
|
-
|
61
|
-
if options[:or]
|
62
|
-
insert_OR
|
63
|
-
end
|
64
|
-
|
65
|
-
return self
|
66
|
-
end
|
67
|
-
|
68
|
-
private
|
69
|
-
def replace_the_any_char_per_any_pattern
|
70
|
-
self.gsub!(/\*/, '.*')
|
71
|
-
end
|
72
|
-
|
73
|
-
def replace_chars_includeds_in_latin_variation_list
|
74
|
-
self.regex_latin_ci_list
|
75
|
-
end
|
76
|
-
|
77
|
-
def insert_border(options)
|
78
|
-
border = BORDER_TO[options[:to]]
|
79
|
-
|
80
|
-
case options[:direction]
|
81
|
-
when :left
|
82
|
-
self.insert(0, border[:left])
|
83
|
-
when :right
|
84
|
-
self.insert(-1, border[:right])
|
85
|
-
when :both
|
86
|
-
self.insert(0, border[:left]).insert(-1, border[:right])
|
87
|
-
else
|
88
|
-
self
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def insert_OR
|
93
|
-
self.insert(-1, "|")
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
class String
|
99
|
-
include RegexMe::Helper
|
100
|
-
end
|
@@ -1,107 +0,0 @@
|
|
1
|
-
require 'string_utility_belt/regex_me_helper'
|
2
|
-
|
3
|
-
module StringUtilityBelt
|
4
|
-
module RegexMe
|
5
|
-
EMPTYs = {:ruby => //, :mysql => ''}
|
6
|
-
WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
|
7
|
-
|
8
|
-
module To
|
9
|
-
module Search
|
10
|
-
def regex_me_to_search_ruby(options = {})
|
11
|
-
regex_me_to_search(:ruby, options)
|
12
|
-
end
|
13
|
-
|
14
|
-
def regex_me_to_search_mysql(options = {})
|
15
|
-
regex_me_to_search(:mysql, options)
|
16
|
-
end
|
17
|
-
|
18
|
-
private
|
19
|
-
|
20
|
-
def options_handler(options)
|
21
|
-
handled = \
|
22
|
-
{:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
|
23
|
-
:multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
|
24
|
-
:or => (options[:or] == false ? false : true)}
|
25
|
-
|
26
|
-
return options.merge(handled)
|
27
|
-
end
|
28
|
-
|
29
|
-
def regex_me_to_search(env, options)
|
30
|
-
return EMPTYs[env] if self.strip.empty?
|
31
|
-
|
32
|
-
execute_builder(env, options)
|
33
|
-
end
|
34
|
-
|
35
|
-
def execute_builder(env, options)
|
36
|
-
opt_handled = options_handler(options)
|
37
|
-
|
38
|
-
builder_result = builder(env, opt_handled)
|
39
|
-
|
40
|
-
case env
|
41
|
-
when :ruby
|
42
|
-
options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
|
43
|
-
Regexp.new(builder_result, *options)
|
44
|
-
when :mysql
|
45
|
-
builder_result
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def builder(border_to, options)
|
50
|
-
string = self
|
51
|
-
|
52
|
-
lcv = options[:latin_chars_variations]
|
53
|
-
|
54
|
-
if options[:exact_phrase]
|
55
|
-
@regexp = \
|
56
|
-
string \
|
57
|
-
.strip.simple_space \
|
58
|
-
.regex_latin_ci_list \
|
59
|
-
.gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
|
60
|
-
.regex_builder(:or => false,
|
61
|
-
:border => {:to => border_to,
|
62
|
-
:direction => :both})
|
63
|
-
else
|
64
|
-
@regexp = '('
|
65
|
-
|
66
|
-
for word in string.strip.split
|
67
|
-
if options[:exact_word]
|
68
|
-
@regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
|
69
|
-
elsif have_the_any_char?(word)
|
70
|
-
@regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
|
71
|
-
else
|
72
|
-
@regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
@regexp = (@regexp << ')').sub!(/\|\)/,')')
|
77
|
-
end
|
78
|
-
|
79
|
-
return @regexp
|
80
|
-
end
|
81
|
-
|
82
|
-
def have_the_any_char?(string)
|
83
|
-
string.include?('*')
|
84
|
-
end
|
85
|
-
|
86
|
-
def border(to, word)
|
87
|
-
direction = nil
|
88
|
-
|
89
|
-
case word
|
90
|
-
when/^\*/
|
91
|
-
direction = :right
|
92
|
-
when /\*$/
|
93
|
-
direction = :left
|
94
|
-
when /^.*\*.*$/
|
95
|
-
direction = :both
|
96
|
-
end
|
97
|
-
|
98
|
-
{:to => to, :direction => direction}
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class String
|
106
|
-
include StringUtilityBelt::RegexMe::To::Search
|
107
|
-
end
|