string_utility_belt 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/string_utility_belt.rb +1 -1
- data/lib/string_utility_belt/general.rb +23 -16
- data/lib/string_utility_belt/match_rank.rb +82 -25
- data/lib/string_utility_belt/regex_me.rb +1 -0
- data/lib/string_utility_belt/regex_me/lib/builders.rb +99 -0
- data/lib/string_utility_belt/regex_me/lib/configurations.rb +10 -0
- data/lib/string_utility_belt/regex_me/lib/helpers.rb +102 -0
- data/lib/string_utility_belt/regex_me/regex_me.rb +5 -0
- data/lib/string_utility_belt/version.rb +1 -1
- metadata +28 -10
- data/lib/string_utility_belt/regex_me_helper.rb +0 -100
- data/lib/string_utility_belt/regex_me_to_search.rb +0 -107
data/lib/string_utility_belt.rb
CHANGED
@@ -1,41 +1,48 @@
|
|
1
|
-
require 'string_utility_belt/
|
1
|
+
require 'string_utility_belt/regex_me'
|
2
2
|
|
3
3
|
module StringUtilityBelt
|
4
4
|
module General
|
5
5
|
class GENERAL
|
6
6
|
CASE_INSENSITIVE_OPT = {:case_insensitive => true}
|
7
7
|
|
8
|
-
def
|
9
|
-
@string
|
8
|
+
def initialize(string)
|
9
|
+
@string = string
|
10
|
+
end
|
11
|
+
|
12
|
+
def have_this_words?(words_to_match, options)
|
10
13
|
@arguments = options
|
11
14
|
|
12
15
|
for word in words_to_match
|
13
|
-
return false if
|
16
|
+
return false if not word_is_found_in_the_string?(word)
|
14
17
|
end
|
15
18
|
|
16
19
|
return true
|
17
20
|
end
|
18
21
|
|
19
22
|
private
|
20
|
-
def
|
21
|
-
@string
|
23
|
+
def word_is_found_in_the_string?(word)
|
24
|
+
@string =~ word.regex_me_to_search_ruby(arguments)
|
22
25
|
end
|
23
26
|
|
24
27
|
def arguments
|
25
|
-
if
|
28
|
+
if args_is_nil? or args_is_boolean?
|
26
29
|
CASE_INSENSITIVE_OPT.merge({:exact_word => @arguments})
|
27
|
-
elsif
|
30
|
+
elsif args_is_hash?
|
28
31
|
@arguments.merge(CASE_INSENSITIVE_OPT)
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
32
|
-
def
|
35
|
+
def args_is_boolean?
|
33
36
|
@arguments.instance_of?(FalseClass) || @arguments.instance_of?(TrueClass)
|
34
37
|
end
|
35
38
|
|
36
|
-
def
|
39
|
+
def args_is_hash?
|
37
40
|
@arguments.instance_of?(Hash)
|
38
41
|
end
|
42
|
+
|
43
|
+
def args_is_nil?
|
44
|
+
@arguments.nil?
|
45
|
+
end
|
39
46
|
end
|
40
47
|
|
41
48
|
WORD_PATTERN = /\w[\w\'\-]*/
|
@@ -55,14 +62,14 @@ module StringUtilityBelt
|
|
55
62
|
self.gsub!(ANY_SPACE_PATTERN, SIMPLE_SPACE)
|
56
63
|
end
|
57
64
|
|
58
|
-
def have_this_words?(words_to_match, options =
|
59
|
-
i = GENERAL.new
|
60
|
-
i.have_this_words?(
|
65
|
+
def have_this_words?(words_to_match, options = nil)
|
66
|
+
i = GENERAL.new(self)
|
67
|
+
i.have_this_words?(words_to_match, options)
|
61
68
|
end
|
62
69
|
|
63
|
-
def not_have_this_words?(words_to_match, options =
|
64
|
-
i = GENERAL.new
|
65
|
-
!i.have_this_words?(
|
70
|
+
def not_have_this_words?(words_to_match, options = nil)
|
71
|
+
i = GENERAL.new(self)
|
72
|
+
!i.have_this_words?(words_to_match, options)
|
66
73
|
end
|
67
74
|
end
|
68
75
|
end
|
@@ -1,46 +1,103 @@
|
|
1
|
-
require 'string_utility_belt/
|
1
|
+
require 'string_utility_belt/regex_me'
|
2
2
|
|
3
3
|
module StringUtilityBelt
|
4
4
|
module MatchRank
|
5
|
+
class MATCHRANK
|
6
|
+
TOTAL_TEMPLATE = {:exact => 0, :matched => 0}
|
7
|
+
SCORE_TEMPLATE = TOTAL_TEMPLATE.merge({:precision => 0})
|
5
8
|
|
6
|
-
|
7
|
-
|
8
|
-
freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
9
|
-
freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
9
|
+
def initialize(string)
|
10
|
+
@text = string
|
10
11
|
end
|
11
|
-
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
def frequency(options)
|
14
|
+
@options = options
|
15
|
+
@result = result_template
|
16
|
+
|
17
|
+
for @search_word in search_words
|
18
|
+
for @text_word in @text.words
|
19
|
+
count_exact_matches
|
20
|
+
count_fragmet_matches
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
calculate_the_precision if @options[:template] == :precision
|
25
|
+
|
26
|
+
return @result
|
17
27
|
end
|
18
|
-
end
|
19
28
|
|
20
|
-
|
21
|
-
|
22
|
-
|
29
|
+
private
|
30
|
+
|
31
|
+
def search_words
|
32
|
+
words = @options[:search_words]
|
33
|
+
words.instance_of?(Array) ? words : words.to_s.words
|
34
|
+
end
|
23
35
|
|
24
|
-
|
36
|
+
def count_exact_matches
|
37
|
+
incr_result(:exact) if find_the_wanted_word(:with_precision => true)
|
38
|
+
end
|
25
39
|
|
26
|
-
|
27
|
-
|
40
|
+
def count_fragmet_matches
|
41
|
+
incr_result(:matched) if find_the_wanted_word(:with_precision => false)
|
42
|
+
end
|
28
43
|
|
29
|
-
|
44
|
+
def calculate_the_precision
|
45
|
+
@result[:precision] = (@result[:exact].to_f / @result[:matched].to_f) * 100
|
46
|
+
end
|
47
|
+
|
48
|
+
def find_the_wanted_word(option)
|
49
|
+
@text_word =~ matcher(option[:with_precision])
|
50
|
+
end
|
30
51
|
|
31
|
-
|
32
|
-
|
33
|
-
|
52
|
+
def matcher(precision)
|
53
|
+
@search_word.regex_me_to_search_ruby(:exact_word => precision,
|
54
|
+
:case_insensitive => true)
|
55
|
+
end
|
34
56
|
|
35
|
-
|
36
|
-
|
37
|
-
|
57
|
+
def incr_result(key)
|
58
|
+
case @options[:template]
|
59
|
+
when :grouped_words
|
60
|
+
begin
|
61
|
+
@result[key][@search_word] += 1
|
62
|
+
rescue
|
63
|
+
@result[key][@search_word] = 0
|
64
|
+
@result[key][@search_word] += 1
|
65
|
+
end
|
66
|
+
else
|
67
|
+
@result[key] += 1
|
38
68
|
end
|
39
69
|
end
|
40
70
|
|
41
|
-
|
71
|
+
def result_template
|
72
|
+
templates.fetch(@options[:template]).clone
|
73
|
+
end
|
74
|
+
|
75
|
+
def templates
|
76
|
+
{
|
77
|
+
:total => TOTAL_TEMPLATE,
|
78
|
+
:precision => SCORE_TEMPLATE,
|
79
|
+
:grouped_words => {:exact => {}, :matched => {}}
|
80
|
+
}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def total_frequency_by(words)
|
85
|
+
measure.frequency(:template => :total, :search_words => words)
|
42
86
|
end
|
43
87
|
|
88
|
+
def words_frequency_by(words)
|
89
|
+
measure.frequency(:template => :grouped_words, :search_words => words)
|
90
|
+
end
|
91
|
+
|
92
|
+
def match_and_score_by(words)
|
93
|
+
measure.frequency(:template => :precision, :search_words => words)
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def measure
|
99
|
+
MATCHRANK.new(self)
|
100
|
+
end
|
44
101
|
end
|
45
102
|
end
|
46
103
|
|
@@ -0,0 +1 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/regex_me/regex_me'
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/configurations'
|
2
|
+
require File.dirname(__FILE__) + '/helpers'
|
3
|
+
|
4
|
+
module RegexMe
|
5
|
+
module Builders
|
6
|
+
include RegexMe::Configurations::Builder
|
7
|
+
|
8
|
+
def regex_me_to_search_ruby(options = {})
|
9
|
+
regex_me_to_search(:ruby, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def regex_me_to_search_mysql(options = {})
|
13
|
+
regex_me_to_search(:mysql, options)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def options_handler(options)
|
19
|
+
handled = \
|
20
|
+
{:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
|
21
|
+
:multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
|
22
|
+
:or => (options[:or] == false ? false : true)}
|
23
|
+
|
24
|
+
return options.merge(handled)
|
25
|
+
end
|
26
|
+
|
27
|
+
def regex_me_to_search(env, options)
|
28
|
+
return EMPTYs[env] if self.strip.empty?
|
29
|
+
|
30
|
+
execute_builder(env, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
def execute_builder(env, options)
|
34
|
+
opt_handled = options_handler(options)
|
35
|
+
|
36
|
+
builder_result = builder(env, opt_handled)
|
37
|
+
|
38
|
+
case env
|
39
|
+
when :ruby
|
40
|
+
options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
|
41
|
+
Regexp.new(builder_result, *options)
|
42
|
+
when :mysql
|
43
|
+
builder_result
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def builder(border_to, options)
|
48
|
+
string = self
|
49
|
+
|
50
|
+
lcv = options[:latin_chars_variations]
|
51
|
+
|
52
|
+
if options[:exact_phrase]
|
53
|
+
@regexp = \
|
54
|
+
string \
|
55
|
+
.strip.simple_space \
|
56
|
+
.regex_latin_ci_list \
|
57
|
+
.gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
|
58
|
+
.regex_builder(:or => false,
|
59
|
+
:border => {:to => border_to,
|
60
|
+
:direction => :both})
|
61
|
+
else
|
62
|
+
@regexp = '('
|
63
|
+
|
64
|
+
for word in string.strip.split
|
65
|
+
if options[:exact_word]
|
66
|
+
@regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
|
67
|
+
elsif have_the_any_char?(word)
|
68
|
+
@regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
|
69
|
+
else
|
70
|
+
@regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
@regexp = (@regexp << ')').sub!(/\|\)/,')')
|
75
|
+
end
|
76
|
+
|
77
|
+
return @regexp
|
78
|
+
end
|
79
|
+
|
80
|
+
def have_the_any_char?(string)
|
81
|
+
string.include?('*')
|
82
|
+
end
|
83
|
+
|
84
|
+
def border(to, word)
|
85
|
+
direction = nil
|
86
|
+
|
87
|
+
case word
|
88
|
+
when/^\*/
|
89
|
+
direction = :right
|
90
|
+
when /\*$/
|
91
|
+
direction = :left
|
92
|
+
when /^.*\*.*$/
|
93
|
+
direction = :both
|
94
|
+
end
|
95
|
+
|
96
|
+
{:to => to, :direction => direction}
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module RegexMe
|
5
|
+
module Helper
|
6
|
+
A_VARIATIONS = "(a|à|á|â|ã|ä)"
|
7
|
+
E_VARIATIONS = "(e|è|é|ê|ë)"
|
8
|
+
I_VARIATIONS = "(i|ì|í|î|ï)"
|
9
|
+
O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
|
10
|
+
U_VARIATIONS = "(u|ù|ú|û|ü)"
|
11
|
+
C_VARIATIONS = "(c|ç)"
|
12
|
+
N_VARIATIONS = "(n|ñ)"
|
13
|
+
|
14
|
+
LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
|
15
|
+
E_VARIATIONS,
|
16
|
+
I_VARIATIONS,
|
17
|
+
O_VARIATIONS,
|
18
|
+
U_VARIATIONS,
|
19
|
+
C_VARIATIONS,
|
20
|
+
N_VARIATIONS]
|
21
|
+
|
22
|
+
BORDER_TO = {
|
23
|
+
:ruby => {:left => '\b', :right => '\b' },
|
24
|
+
:mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
|
25
|
+
}
|
26
|
+
|
27
|
+
def regex_latin_ci_list
|
28
|
+
memo = ""
|
29
|
+
|
30
|
+
self.each_char do |char|
|
31
|
+
changed = false
|
32
|
+
|
33
|
+
for variations in LATIN_CHARS_VARIATIONS
|
34
|
+
variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
|
35
|
+
|
36
|
+
if char =~ variations_pattern
|
37
|
+
changed = true
|
38
|
+
memo.insert(-1, variations)
|
39
|
+
break
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
memo.insert(-1, char) unless changed
|
44
|
+
end
|
45
|
+
|
46
|
+
self.replace(memo)
|
47
|
+
end
|
48
|
+
|
49
|
+
def regex_builder(options)
|
50
|
+
if options[:any]
|
51
|
+
replace_the_any_char_per_any_pattern
|
52
|
+
end
|
53
|
+
|
54
|
+
if options[:latin_chars_variations]
|
55
|
+
replace_chars_includeds_in_latin_variation_list
|
56
|
+
end
|
57
|
+
|
58
|
+
if options[:border]
|
59
|
+
insert_border(options[:border])
|
60
|
+
end
|
61
|
+
|
62
|
+
if options[:or]
|
63
|
+
insert_OR
|
64
|
+
end
|
65
|
+
|
66
|
+
return self
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def replace_the_any_char_per_any_pattern
|
71
|
+
self.gsub!(/\*/, '.*')
|
72
|
+
end
|
73
|
+
|
74
|
+
def replace_chars_includeds_in_latin_variation_list
|
75
|
+
self.regex_latin_ci_list
|
76
|
+
end
|
77
|
+
|
78
|
+
def insert_border(options)
|
79
|
+
border = BORDER_TO[options[:to]]
|
80
|
+
|
81
|
+
case options[:direction]
|
82
|
+
when :left
|
83
|
+
self.insert(0, border[:left])
|
84
|
+
when :right
|
85
|
+
self.insert(-1, border[:right])
|
86
|
+
when :both
|
87
|
+
self.insert(0, border[:left]).insert(-1, border[:right])
|
88
|
+
else
|
89
|
+
self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def insert_OR
|
94
|
+
self.insert(-1, "|")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
class String
|
101
|
+
include StringUtilityBelt::RegexMe::Helper
|
102
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_utility_belt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Rodrigo Serradura
|
@@ -15,10 +15,25 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07-
|
19
|
-
dependencies:
|
20
|
-
|
21
|
-
|
18
|
+
date: 2011-07-12 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: htmlentities
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - "="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 51
|
29
|
+
segments:
|
30
|
+
- 4
|
31
|
+
- 3
|
32
|
+
- 0
|
33
|
+
version: 4.3.0
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
description: Adds new features for String objects.
|
22
37
|
email:
|
23
38
|
- rserradura@gmail.com
|
24
39
|
executables: []
|
@@ -37,8 +52,11 @@ files:
|
|
37
52
|
- lib/string_utility_belt/entities.rb
|
38
53
|
- lib/string_utility_belt/general.rb
|
39
54
|
- lib/string_utility_belt/match_rank.rb
|
40
|
-
- lib/string_utility_belt/
|
41
|
-
- lib/string_utility_belt/
|
55
|
+
- lib/string_utility_belt/regex_me.rb
|
56
|
+
- lib/string_utility_belt/regex_me/lib/builders.rb
|
57
|
+
- lib/string_utility_belt/regex_me/lib/configurations.rb
|
58
|
+
- lib/string_utility_belt/regex_me/lib/helpers.rb
|
59
|
+
- lib/string_utility_belt/regex_me/regex_me.rb
|
42
60
|
- lib/string_utility_belt/tags.rb
|
43
61
|
- lib/string_utility_belt/version.rb
|
44
62
|
- test/string_utility_belt/entities_test.rb
|
@@ -80,6 +98,6 @@ rubyforge_project: string_utility_belt
|
|
80
98
|
rubygems_version: 1.8.2
|
81
99
|
signing_key:
|
82
100
|
specification_version: 3
|
83
|
-
summary:
|
101
|
+
summary: Useful methods to handle strings
|
84
102
|
test_files: []
|
85
103
|
|
@@ -1,100 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
module RegexMe
|
4
|
-
module Helper
|
5
|
-
A_VARIATIONS = "(a|à|á|â|ã|ä)"
|
6
|
-
E_VARIATIONS = "(e|è|é|ê|ë)"
|
7
|
-
I_VARIATIONS = "(i|ì|í|î|ï)"
|
8
|
-
O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
|
9
|
-
U_VARIATIONS = "(u|ù|ú|û|ü)"
|
10
|
-
C_VARIATIONS = "(c|ç)"
|
11
|
-
N_VARIATIONS = "(n|ñ)"
|
12
|
-
|
13
|
-
LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
|
14
|
-
E_VARIATIONS,
|
15
|
-
I_VARIATIONS,
|
16
|
-
O_VARIATIONS,
|
17
|
-
U_VARIATIONS,
|
18
|
-
C_VARIATIONS,
|
19
|
-
N_VARIATIONS]
|
20
|
-
|
21
|
-
BORDER_TO = {
|
22
|
-
:ruby => {:left => '\b', :right => '\b' },
|
23
|
-
:mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
|
24
|
-
}
|
25
|
-
|
26
|
-
def regex_latin_ci_list
|
27
|
-
memo = ""
|
28
|
-
|
29
|
-
self.each_char do |char|
|
30
|
-
changed = false
|
31
|
-
|
32
|
-
for variations in LATIN_CHARS_VARIATIONS
|
33
|
-
variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
|
34
|
-
|
35
|
-
if char =~ variations_pattern
|
36
|
-
changed = true
|
37
|
-
memo.insert(-1, variations)
|
38
|
-
break
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
memo.insert(-1, char) unless changed
|
43
|
-
end
|
44
|
-
|
45
|
-
self.replace(memo)
|
46
|
-
end
|
47
|
-
|
48
|
-
def regex_builder(options)
|
49
|
-
if options[:any]
|
50
|
-
replace_the_any_char_per_any_pattern
|
51
|
-
end
|
52
|
-
|
53
|
-
if options[:latin_chars_variations]
|
54
|
-
replace_chars_includeds_in_latin_variation_list
|
55
|
-
end
|
56
|
-
|
57
|
-
if options[:border]
|
58
|
-
insert_border(options[:border])
|
59
|
-
end
|
60
|
-
|
61
|
-
if options[:or]
|
62
|
-
insert_OR
|
63
|
-
end
|
64
|
-
|
65
|
-
return self
|
66
|
-
end
|
67
|
-
|
68
|
-
private
|
69
|
-
def replace_the_any_char_per_any_pattern
|
70
|
-
self.gsub!(/\*/, '.*')
|
71
|
-
end
|
72
|
-
|
73
|
-
def replace_chars_includeds_in_latin_variation_list
|
74
|
-
self.regex_latin_ci_list
|
75
|
-
end
|
76
|
-
|
77
|
-
def insert_border(options)
|
78
|
-
border = BORDER_TO[options[:to]]
|
79
|
-
|
80
|
-
case options[:direction]
|
81
|
-
when :left
|
82
|
-
self.insert(0, border[:left])
|
83
|
-
when :right
|
84
|
-
self.insert(-1, border[:right])
|
85
|
-
when :both
|
86
|
-
self.insert(0, border[:left]).insert(-1, border[:right])
|
87
|
-
else
|
88
|
-
self
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def insert_OR
|
93
|
-
self.insert(-1, "|")
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
class String
|
99
|
-
include RegexMe::Helper
|
100
|
-
end
|
@@ -1,107 +0,0 @@
|
|
1
|
-
require 'string_utility_belt/regex_me_helper'
|
2
|
-
|
3
|
-
module StringUtilityBelt
|
4
|
-
module RegexMe
|
5
|
-
EMPTYs = {:ruby => //, :mysql => ''}
|
6
|
-
WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
|
7
|
-
|
8
|
-
module To
|
9
|
-
module Search
|
10
|
-
def regex_me_to_search_ruby(options = {})
|
11
|
-
regex_me_to_search(:ruby, options)
|
12
|
-
end
|
13
|
-
|
14
|
-
def regex_me_to_search_mysql(options = {})
|
15
|
-
regex_me_to_search(:mysql, options)
|
16
|
-
end
|
17
|
-
|
18
|
-
private
|
19
|
-
|
20
|
-
def options_handler(options)
|
21
|
-
handled = \
|
22
|
-
{:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
|
23
|
-
:multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
|
24
|
-
:or => (options[:or] == false ? false : true)}
|
25
|
-
|
26
|
-
return options.merge(handled)
|
27
|
-
end
|
28
|
-
|
29
|
-
def regex_me_to_search(env, options)
|
30
|
-
return EMPTYs[env] if self.strip.empty?
|
31
|
-
|
32
|
-
execute_builder(env, options)
|
33
|
-
end
|
34
|
-
|
35
|
-
def execute_builder(env, options)
|
36
|
-
opt_handled = options_handler(options)
|
37
|
-
|
38
|
-
builder_result = builder(env, opt_handled)
|
39
|
-
|
40
|
-
case env
|
41
|
-
when :ruby
|
42
|
-
options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
|
43
|
-
Regexp.new(builder_result, *options)
|
44
|
-
when :mysql
|
45
|
-
builder_result
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def builder(border_to, options)
|
50
|
-
string = self
|
51
|
-
|
52
|
-
lcv = options[:latin_chars_variations]
|
53
|
-
|
54
|
-
if options[:exact_phrase]
|
55
|
-
@regexp = \
|
56
|
-
string \
|
57
|
-
.strip.simple_space \
|
58
|
-
.regex_latin_ci_list \
|
59
|
-
.gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
|
60
|
-
.regex_builder(:or => false,
|
61
|
-
:border => {:to => border_to,
|
62
|
-
:direction => :both})
|
63
|
-
else
|
64
|
-
@regexp = '('
|
65
|
-
|
66
|
-
for word in string.strip.split
|
67
|
-
if options[:exact_word]
|
68
|
-
@regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
|
69
|
-
elsif have_the_any_char?(word)
|
70
|
-
@regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
|
71
|
-
else
|
72
|
-
@regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
@regexp = (@regexp << ')').sub!(/\|\)/,')')
|
77
|
-
end
|
78
|
-
|
79
|
-
return @regexp
|
80
|
-
end
|
81
|
-
|
82
|
-
def have_the_any_char?(string)
|
83
|
-
string.include?('*')
|
84
|
-
end
|
85
|
-
|
86
|
-
def border(to, word)
|
87
|
-
direction = nil
|
88
|
-
|
89
|
-
case word
|
90
|
-
when/^\*/
|
91
|
-
direction = :right
|
92
|
-
when /\*$/
|
93
|
-
direction = :left
|
94
|
-
when /^.*\*.*$/
|
95
|
-
direction = :both
|
96
|
-
end
|
97
|
-
|
98
|
-
{:to => to, :direction => direction}
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class String
|
106
|
-
include StringUtilityBelt::RegexMe::To::Search
|
107
|
-
end
|