string_utility_belt 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +28 -0
- data/README.markdown +145 -0
- data/Rakefile +41 -33
- data/lib/string_utility_belt/entities.rb +23 -0
- data/lib/string_utility_belt/general.rb +72 -0
- data/lib/{match_rank → string_utility_belt}/match_rank.rb +30 -24
- data/lib/string_utility_belt/regex_me_helper.rb +100 -0
- data/lib/string_utility_belt/regex_me_to_search.rb +107 -0
- data/lib/string_utility_belt/tags.rb +22 -0
- data/lib/string_utility_belt/version.rb +1 -6
- data/lib/string_utility_belt.rb +6 -17
- data/test/string_utility_belt/entities_test.rb +17 -0
- data/test/string_utility_belt/general_test.rb +73 -0
- data/test/string_utility_belt/match_rank_test.rb +64 -0
- data/test/string_utility_belt/regex_me_helper_test.rb +117 -0
- data/test/string_utility_belt/regex_me_to_search_test.rb +106 -0
- data/test/string_utility_belt/tags_test.rb +25 -0
- data/test/test_helper.rb +5 -0
- metadata +30 -22
- data/lib/general/general.rb +0 -36
- data/lib/html_and_aml/helpers/entities.rb +0 -16
- data/lib/html_and_aml/helpers/tags.rb +0 -13
- data/lib/html_and_aml/html_and_aml.rb +0 -10
- data/lib/regex_me/helpers/string/regex_me.rb +0 -73
- data/lib/regex_me/regex_me.rb +0 -84
- data/string_utility_belt.gemspec +0 -10
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
string_utility_belt (0.2.3)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
columnize (0.3.3)
|
10
|
+
htmlentities (4.3.0)
|
11
|
+
linecache (0.46)
|
12
|
+
rbx-require-relative (> 0.0.4)
|
13
|
+
rbx-require-relative (0.0.5)
|
14
|
+
rcov (0.9.9)
|
15
|
+
ruby-debug (0.10.4)
|
16
|
+
columnize (>= 0.1)
|
17
|
+
ruby-debug-base (~> 0.10.4.0)
|
18
|
+
ruby-debug-base (0.10.4)
|
19
|
+
linecache (>= 0.3)
|
20
|
+
|
21
|
+
PLATFORMS
|
22
|
+
ruby
|
23
|
+
|
24
|
+
DEPENDENCIES
|
25
|
+
htmlentities (= 4.3.0)
|
26
|
+
rcov
|
27
|
+
ruby-debug
|
28
|
+
string_utility_belt!
|
data/README.markdown
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# serradura-string_utility_belt
|
2
|
+
|
3
|
+
## Links
|
4
|
+
|
5
|
+
<a href='http://rubygems.org/gems/string_utility_belt'>http://rubygems.org/gems/string_utility_belt</a>
|
6
|
+
|
7
|
+
<a href="http://github.com/serradura/string_utility_belt">http://github.com/serradura/string_utility_belt</a>
|
8
|
+
|
9
|
+
## Install
|
10
|
+
|
11
|
+
gem install string_utility_belt
|
12
|
+
|
13
|
+
## Let's code!
|
14
|
+
|
15
|
+
Pessoal,
|
16
|
+
Peguei um amontoado de métodos que andei desenvolvendo para strings e surgiu essa gem!
|
17
|
+
|
18
|
+
Dentre alguns dos módulos que ela tem é o módulo que transforma a String em uma Regex, o nome do módulo é RegexMe! :p
|
19
|
+
|
20
|
+
Segue alguns exemplos:
|
21
|
+
|
22
|
+
>> require "string_utility_belt"
|
23
|
+
>> "coca cola".regex_me_to_search_ruby
|
24
|
+
=> /(coca|cola)/
|
25
|
+
|
26
|
+
Qual a utilidade disso???
|
27
|
+
|
28
|
+
Imagine que você tem a seguinte coleção:
|
29
|
+
|
30
|
+
minha_colecao = %w{carro caminhão moto lancha avião banana bonono benene}
|
31
|
+
|
32
|
+
E você que selecionar as palavras que contenha: car e mo
|
33
|
+
|
34
|
+
minha_colecao.select { |item| item =~ "car mo".regex_me_to_search_ruby }
|
35
|
+
#=> ["carro", "moto"]
|
36
|
+
|
37
|
+
Mas e se você quiser as palavras que termine com a letra "a" e que case com b*n*n*
|
38
|
+
|
39
|
+
minha_colecao.select { |item| item =~ "*a b*n*n*".regex_me_to_search_ruby }
|
40
|
+
#=> ["lancha", "banana", "bonono", "benene"]
|
41
|
+
|
42
|
+
Vamos tentar só com o que termina com a letra "a"
|
43
|
+
|
44
|
+
minha_colecao.select { |item| item =~ "*a".regex_me_to_search_ruby }
|
45
|
+
#=> ["lancha", "banana"]
|
46
|
+
|
47
|
+
Que comece com a letra "m"
|
48
|
+
|
49
|
+
minha_colecao.select { |item| item =~ "m*".regex_me_to_search_ruby }
|
50
|
+
#=> ["moto"]
|
51
|
+
|
52
|
+
Uaaaaauuuu, entenderam as possibilidades?
|
53
|
+
Você também pode fazer:
|
54
|
+
|
55
|
+
minha_colecao.select { |item| item =~ "m* car *a b*n*".regex_me_to_search_ruby }
|
56
|
+
#=> ["carro", "moto", "lancha", "banana", "bonono", "benene"]
|
57
|
+
|
58
|
+
Você também pode passar alguns parametros para criar Regex mais inteligentes:
|
59
|
+
|
60
|
+
Regexs que ignoram case sensitive
|
61
|
+
|
62
|
+
minha_colecao.select { |item| item =~ "N".regex_me_to_search_ruby(:case_insensitive => true) }
|
63
|
+
#=> ["caminhão", "lancha", "banana", "bonono", "benene"]
|
64
|
+
|
65
|
+
Que case palavras exatas idependente se é maiuscula ou minuscula
|
66
|
+
|
67
|
+
["Ruby Rails", "Ruby on Rails", "Ruby - Rails"].select { |item| item =~ "ruby rails".regex_me_to_search_ruby(:case_insensitive => true, :exact_phrase => true) }
|
68
|
+
#=> ["Ruby Rails", "Ruby - Rails"]
|
69
|
+
|
70
|
+
O parâmetro :exact_phrase ignora qualquer caracter do tipo
|
71
|
+
letra (maiúscula ou minúscula) e números além do char "_"
|
72
|
+
me basei na regra utilizada pelo twitter e google quando usamos "
|
73
|
+
|
74
|
+
Ex: "Ruby Rails"
|
75
|
+
|
76
|
+
Que tenha palavras exatas
|
77
|
+
|
78
|
+
minha_colecao.select { |item| item =~ "car".regex_me_to_search_ruby(:exact_word => true) }
|
79
|
+
#=> []
|
80
|
+
|
81
|
+
Se eu quiser que palavras que contenham car
|
82
|
+
|
83
|
+
minha_colecao.select { |item| item =~ "car".regex_me_to_search_ruby }
|
84
|
+
#=> ["carro"]
|
85
|
+
|
86
|
+
Agora vamos supor que o usuário queira casar a palavra estágio
|
87
|
+
mas nos textos que ele estava buscando a palavra ele percebeu que existia palavra com e sem acentuação
|
88
|
+
|
89
|
+
palavras = %w{estagio estágio éstágio estagió estagios}
|
90
|
+
|
91
|
+
palavras.select { |palavra| palavra =~ "estágio".regex_me_to_search_ruby }
|
92
|
+
#=> ["estágio"]
|
93
|
+
|
94
|
+
E agora e se eu quiser casar as palavras independente da acentuação???
|
95
|
+
|
96
|
+
palavras.select { |palavra| palavra =~ "estágio".regex_me_to_search_ruby(:latin_chars_variation => true)}
|
97
|
+
#=> ["estagio", "estágio", "éstágio", "estagió", "estagios"]
|
98
|
+
|
99
|
+
Mas se eu quiser apenas estágio, e ignorar estagios por exemplo???
|
100
|
+
|
101
|
+
palavras.select { |palavra| palavra =~ "estágio".regex_me_to_search_ruby(:latin_chars_variation => true, :exact_word => true)}
|
102
|
+
#=> ["estagio", "estágio", "éstágio", "estagió"]
|
103
|
+
|
104
|
+
|
105
|
+
Mas saindo um pouco do ruby e pensando numa aplicação em rails.
|
106
|
+
Imagine que você tem um campo de busca e o usuário pode escrever várias palavras no campo (Igual ao google) e o resultado deverá retornar as palavras que contenham o que o usário digitou.
|
107
|
+
|
108
|
+
Imagine que você tá no controller (Embora essa lógica deveria estar no model! :D)
|
109
|
+
|
110
|
+
@textos = Texto.all(:conditions => ["texto REGEXP ?", params[:busca].regex_me_to_search_mysql])
|
111
|
+
|
112
|
+
Por enquanto as regex estão prontas para o mysql e você pode utilizar todos os parâmetros que foram apresentados acima!
|
113
|
+
|
114
|
+
Perceberam os ganho que tivemos na aplicação???
|
115
|
+
|
116
|
+
O usuário pode escrever no form:
|
117
|
+
|
118
|
+
car* *a c*r*
|
119
|
+
|
120
|
+
Só com isso você já da o poder do usuário fazer pesquisas mais inteligentes e você só utiliza um método!
|
121
|
+
|
122
|
+
Não sei se devo.... Mas muitos programadores fazem buscas dinâmicas utilizando o operador LIKE e sai essas bizarrices.
|
123
|
+
|
124
|
+
SELECT * FROM TEXTOS
|
125
|
+
WHERE texto LIKE "%CARRO%"
|
126
|
+
OR texto LIKE "%MOTO%"
|
127
|
+
OR texto LIKE "%AVIAO%"
|
128
|
+
|
129
|
+
Já com o <b>string_utility_belt</b> ele vai gerar.
|
130
|
+
|
131
|
+
SELECT * FROM TEXTOS WHERE texto REGEXP "(CARRO|MOTO|AVIAO)"
|
132
|
+
|
133
|
+
Que é muito mais inteligente e poderoso!!!
|
134
|
+
|
135
|
+
É isso!!!
|
136
|
+
|
137
|
+
A api tem outras funcionalidades bem interessantes...
|
138
|
+
Mas para começar vou documentar apenas o módulo RegexMe!
|
139
|
+
|
140
|
+
E caso você queira colaborar...
|
141
|
+
Dê um fork no projeto envie seus códigos e publicarei na GEM.
|
142
|
+
|
143
|
+
Abraço,
|
144
|
+
Serradura
|
145
|
+
|
data/Rakefile
CHANGED
@@ -1,35 +1,19 @@
|
|
1
|
-
|
2
|
-
# To change this template, choose Tools | Templates
|
3
|
-
# and open the template in the editor.
|
4
|
-
|
5
|
-
|
6
|
-
require 'rubygems'
|
1
|
+
require 'rubygems' if RUBY_VERSION < '1.9'
|
7
2
|
require 'rake'
|
8
3
|
require 'rake/clean'
|
9
|
-
require 'rake/gempackagetask'
|
10
|
-
require 'rake/rdoctask'
|
11
4
|
require 'rake/testtask'
|
12
|
-
require '
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
s.extra_rdoc_files = ['README', 'LICENSE']
|
19
|
-
s.summary = 'Your summary here'
|
20
|
-
s.description = s.summary
|
21
|
-
s.author = ''
|
22
|
-
s.email = ''
|
23
|
-
# s.executables = ['your_executable_here']
|
24
|
-
s.files = %w(LICENSE README Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
|
25
|
-
s.require_path = "lib"
|
26
|
-
s.bindir = "bin"
|
5
|
+
require 'bundler/gem_tasks'
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'rake/rdoctask'
|
9
|
+
rescue
|
10
|
+
require 'rdoc/task'
|
27
11
|
end
|
28
12
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
13
|
+
begin
|
14
|
+
require 'rcov/rcovtask'
|
15
|
+
rescue
|
16
|
+
require 'rcov/task'
|
33
17
|
end
|
34
18
|
|
35
19
|
Rake::RDocTask.new do |rdoc|
|
@@ -41,11 +25,35 @@ Rake::RDocTask.new do |rdoc|
|
|
41
25
|
rdoc.options << '--line-numbers'
|
42
26
|
end
|
43
27
|
|
44
|
-
|
45
|
-
|
28
|
+
namespace :test do
|
29
|
+
Rake::TestTask.new do |t|
|
30
|
+
t.test_files = FileList['test/**/*.rb']
|
31
|
+
t.name = 'all'
|
32
|
+
end
|
46
33
|
end
|
47
34
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
35
|
+
def run_coverage(files)
|
36
|
+
rm_f "coverage"
|
37
|
+
rm_f "coverage.data"
|
38
|
+
|
39
|
+
# turn the files we want to run into a string
|
40
|
+
if files.length == 0
|
41
|
+
puts "No files were specified for testing"
|
42
|
+
return
|
43
|
+
end
|
44
|
+
|
45
|
+
files = files.join(" ")
|
46
|
+
|
47
|
+
exclude = '--exclude "usr/*"'
|
48
|
+
|
49
|
+
rcov = "rcov -Ilib:test --sort coverage --text-report #{exclude} --aggregate coverage.data"
|
50
|
+
cmd = "#{rcov} #{files}"
|
51
|
+
sh cmd
|
52
|
+
end
|
53
|
+
|
54
|
+
namespace :test do
|
55
|
+
desc 'Measures test coverage'
|
56
|
+
task :rcov do
|
57
|
+
run_coverage Dir["test/string_utility_belt/**/*.rb"]
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'htmlentities'
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module Entities
|
5
|
+
CODER = HTMLEntities.new
|
6
|
+
|
7
|
+
def generate_entities
|
8
|
+
CODER.encode(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
def decode_entities
|
12
|
+
CODER.decode(self)
|
13
|
+
end
|
14
|
+
|
15
|
+
def decode_entities_and_cleaner
|
16
|
+
decode_entities.tag_cleaner
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class String
|
22
|
+
include StringUtilityBelt::Entities
|
23
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'string_utility_belt/regex_me_to_search'
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module General
|
5
|
+
class GENERAL
|
6
|
+
CASE_INSENSITIVE_OPT = {:case_insensitive => true}
|
7
|
+
|
8
|
+
def have_this_words?(string, words_to_match, options)
|
9
|
+
@string = string
|
10
|
+
@arguments = options
|
11
|
+
|
12
|
+
for word in words_to_match
|
13
|
+
return false if string_does_not_match_with_this_word_pattern?(word)
|
14
|
+
end
|
15
|
+
|
16
|
+
return true
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
def string_does_not_match_with_this_word_pattern?(word)
|
21
|
+
@string !~ word.regex_me_to_search_ruby(arguments)
|
22
|
+
end
|
23
|
+
|
24
|
+
def arguments
|
25
|
+
if is_boolean?
|
26
|
+
CASE_INSENSITIVE_OPT.merge({:exact_word => @arguments})
|
27
|
+
elsif is_hash?
|
28
|
+
@arguments.merge(CASE_INSENSITIVE_OPT)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def is_boolean?
|
33
|
+
@arguments.instance_of?(FalseClass) || @arguments.instance_of?(TrueClass)
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_hash?
|
37
|
+
@arguments.instance_of?(Hash)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
WORD_PATTERN = /\w[\w\'\-]*/
|
42
|
+
ANY_SPACE_PATTERN = /\s+/
|
43
|
+
|
44
|
+
SIMPLE_SPACE = " "
|
45
|
+
|
46
|
+
def words
|
47
|
+
self.scan(WORD_PATTERN)
|
48
|
+
end
|
49
|
+
|
50
|
+
def simple_space
|
51
|
+
self.gsub(ANY_SPACE_PATTERN, SIMPLE_SPACE)
|
52
|
+
end
|
53
|
+
|
54
|
+
def simple_space!
|
55
|
+
self.gsub!(ANY_SPACE_PATTERN, SIMPLE_SPACE)
|
56
|
+
end
|
57
|
+
|
58
|
+
def have_this_words?(words_to_match, options = false)
|
59
|
+
i = GENERAL.new
|
60
|
+
i.have_this_words?(self, words_to_match, options)
|
61
|
+
end
|
62
|
+
|
63
|
+
def not_have_this_words?(words_to_match, options = false)
|
64
|
+
i = GENERAL.new
|
65
|
+
!i.have_this_words?(self, words_to_match, options)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class String
|
71
|
+
include StringUtilityBelt::General
|
72
|
+
end
|
@@ -1,16 +1,33 @@
|
|
1
|
+
require 'string_utility_belt/regex_me_to_search'
|
1
2
|
|
2
|
-
module
|
3
|
+
module StringUtilityBelt
|
4
|
+
module MatchRank
|
3
5
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
def total_frequency_by words_to_match
|
7
|
+
frequency_by(words_to_match, 0, 0) do |freq, word_to_match, word|
|
8
|
+
freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
9
|
+
freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
10
|
+
end
|
11
|
+
end
|
9
12
|
|
10
|
-
|
11
|
-
|
13
|
+
def words_frequency_by words_to_match
|
14
|
+
frequency_by(words_to_match, Hash.new(0), Hash.new(0)) do |freq, word_to_match, word|
|
15
|
+
freq[:exact][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
16
|
+
freq[:matched][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def match_and_score_by words_to_match
|
21
|
+
freq = self.total_frequency_by words_to_match
|
22
|
+
statistic = {:exact => freq[:exact].to_f, :matched => freq[:matched].to_f, :precision => 0.0}
|
23
|
+
|
24
|
+
statistic[:precision] = (statistic[:exact] / statistic[:matched]) * 100
|
25
|
+
|
26
|
+
return statistic
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
12
30
|
|
13
|
-
private
|
14
31
|
def frequency_by words_to_match, frequency_object_a, frequency_object_b
|
15
32
|
self_words = self.words
|
16
33
|
freq = {:exact => frequency_object_a, :matched => frequency_object_b}
|
@@ -24,20 +41,9 @@ module MatchRank
|
|
24
41
|
return freq
|
25
42
|
end
|
26
43
|
|
27
|
-
|
28
|
-
def words_frequency_by words_to_match
|
29
|
-
frequency_by(words_to_match, Hash.new(0), Hash.new(0)) do |freq, word_to_match, word|
|
30
|
-
freq[:exact][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
31
|
-
freq[:matched][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def total_frequency_by words_to_match
|
36
|
-
frequency_by(words_to_match, 0, 0) do |freq, word_to_match, word|
|
37
|
-
freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
38
|
-
freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
44
|
+
end
|
42
45
|
end
|
43
46
|
|
47
|
+
class String
|
48
|
+
include StringUtilityBelt::MatchRank
|
49
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module RegexMe
|
4
|
+
module Helper
|
5
|
+
A_VARIATIONS = "(a|à|á|â|ã|ä)"
|
6
|
+
E_VARIATIONS = "(e|è|é|ê|ë)"
|
7
|
+
I_VARIATIONS = "(i|ì|í|î|ï)"
|
8
|
+
O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
|
9
|
+
U_VARIATIONS = "(u|ù|ú|û|ü)"
|
10
|
+
C_VARIATIONS = "(c|ç)"
|
11
|
+
N_VARIATIONS = "(n|ñ)"
|
12
|
+
|
13
|
+
LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
|
14
|
+
E_VARIATIONS,
|
15
|
+
I_VARIATIONS,
|
16
|
+
O_VARIATIONS,
|
17
|
+
U_VARIATIONS,
|
18
|
+
C_VARIATIONS,
|
19
|
+
N_VARIATIONS]
|
20
|
+
|
21
|
+
BORDER_TO = {
|
22
|
+
:ruby => {:left => '\b', :right => '\b' },
|
23
|
+
:mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
|
24
|
+
}
|
25
|
+
|
26
|
+
def regex_latin_ci_list
|
27
|
+
memo = ""
|
28
|
+
|
29
|
+
self.each_char do |char|
|
30
|
+
changed = false
|
31
|
+
|
32
|
+
for variations in LATIN_CHARS_VARIATIONS
|
33
|
+
variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
|
34
|
+
|
35
|
+
if char =~ variations_pattern
|
36
|
+
changed = true
|
37
|
+
memo.insert(-1, variations)
|
38
|
+
break
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
memo.insert(-1, char) unless changed
|
43
|
+
end
|
44
|
+
|
45
|
+
self.replace(memo)
|
46
|
+
end
|
47
|
+
|
48
|
+
def regex_builder(options)
|
49
|
+
if options[:any]
|
50
|
+
replace_the_any_char_per_any_pattern
|
51
|
+
end
|
52
|
+
|
53
|
+
if options[:latin_chars_variations]
|
54
|
+
replace_chars_includeds_in_latin_variation_list
|
55
|
+
end
|
56
|
+
|
57
|
+
if options[:border]
|
58
|
+
insert_border(options[:border])
|
59
|
+
end
|
60
|
+
|
61
|
+
if options[:or]
|
62
|
+
insert_OR
|
63
|
+
end
|
64
|
+
|
65
|
+
return self
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def replace_the_any_char_per_any_pattern
|
70
|
+
self.gsub!(/\*/, '.*')
|
71
|
+
end
|
72
|
+
|
73
|
+
def replace_chars_includeds_in_latin_variation_list
|
74
|
+
self.regex_latin_ci_list
|
75
|
+
end
|
76
|
+
|
77
|
+
def insert_border(options)
|
78
|
+
border = BORDER_TO[options[:to]]
|
79
|
+
|
80
|
+
case options[:direction]
|
81
|
+
when :left
|
82
|
+
self.insert(0, border[:left])
|
83
|
+
when :right
|
84
|
+
self.insert(-1, border[:right])
|
85
|
+
when :both
|
86
|
+
self.insert(0, border[:left]).insert(-1, border[:right])
|
87
|
+
else
|
88
|
+
self
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def insert_OR
|
93
|
+
self.insert(-1, "|")
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class String
|
99
|
+
include RegexMe::Helper
|
100
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'string_utility_belt/regex_me_helper'
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module RegexMe
|
5
|
+
EMPTYs = {:ruby => //, :mysql => ''}
|
6
|
+
WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
|
7
|
+
|
8
|
+
module To
|
9
|
+
module Search
|
10
|
+
def regex_me_to_search_ruby(options = {})
|
11
|
+
regex_me_to_search(:ruby, options)
|
12
|
+
end
|
13
|
+
|
14
|
+
def regex_me_to_search_mysql(options = {})
|
15
|
+
regex_me_to_search(:mysql, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def options_handler(options)
|
21
|
+
handled = \
|
22
|
+
{:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
|
23
|
+
:multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
|
24
|
+
:or => (options[:or] == false ? false : true)}
|
25
|
+
|
26
|
+
return options.merge(handled)
|
27
|
+
end
|
28
|
+
|
29
|
+
def regex_me_to_search(env, options)
|
30
|
+
return EMPTYs[env] if self.strip.empty?
|
31
|
+
|
32
|
+
execute_builder(env, options)
|
33
|
+
end
|
34
|
+
|
35
|
+
def execute_builder(env, options)
|
36
|
+
opt_handled = options_handler(options)
|
37
|
+
|
38
|
+
builder_result = builder(env, opt_handled)
|
39
|
+
|
40
|
+
case env
|
41
|
+
when :ruby
|
42
|
+
options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
|
43
|
+
Regexp.new(builder_result, *options)
|
44
|
+
when :mysql
|
45
|
+
builder_result
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def builder(border_to, options)
|
50
|
+
string = self
|
51
|
+
|
52
|
+
lcv = options[:latin_chars_variations]
|
53
|
+
|
54
|
+
if options[:exact_phrase]
|
55
|
+
@regexp = \
|
56
|
+
string \
|
57
|
+
.strip.simple_space \
|
58
|
+
.regex_latin_ci_list \
|
59
|
+
.gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
|
60
|
+
.regex_builder(:or => false,
|
61
|
+
:border => {:to => border_to,
|
62
|
+
:direction => :both})
|
63
|
+
else
|
64
|
+
@regexp = '('
|
65
|
+
|
66
|
+
for word in string.strip.split
|
67
|
+
if options[:exact_word]
|
68
|
+
@regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
|
69
|
+
elsif have_the_any_char?(word)
|
70
|
+
@regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
|
71
|
+
else
|
72
|
+
@regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
@regexp = (@regexp << ')').sub!(/\|\)/,')')
|
77
|
+
end
|
78
|
+
|
79
|
+
return @regexp
|
80
|
+
end
|
81
|
+
|
82
|
+
def have_the_any_char?(string)
|
83
|
+
string.include?('*')
|
84
|
+
end
|
85
|
+
|
86
|
+
def border(to, word)
|
87
|
+
direction = nil
|
88
|
+
|
89
|
+
case word
|
90
|
+
when/^\*/
|
91
|
+
direction = :right
|
92
|
+
when /\*$/
|
93
|
+
direction = :left
|
94
|
+
when /^.*\*.*$/
|
95
|
+
direction = :both
|
96
|
+
end
|
97
|
+
|
98
|
+
{:to => to, :direction => direction}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class String
|
106
|
+
include StringUtilityBelt::RegexMe::To::Search
|
107
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module StringUtilityBelt
|
2
|
+
module Tags
|
3
|
+
EMPTY_STR = ''
|
4
|
+
|
5
|
+
TAG_PATTERN = /<[^<]*?>/
|
6
|
+
|
7
|
+
# TAGs disponíveis até 09/2010 - FONTE: http://www.w3schools.com/tags/default.asp
|
8
|
+
ANY_HTML_TAG_PATTERN = /<\/?(a|p|abbr|acronym|address|applet|area|b|base|basefont|bdo|big|blockquote|body|br|button|caption|center|cite|code|col|colgroup|dd|del|dfn|dir|div|dl|dt|em|fieldset|font|form|frame|frameset|h6|head|hr|html|i|iframe|img|input|ins|isindex|kbd|label|legend|li|link|map|menu|meta|noframes|noscript|object)[^>]+??>/im
|
9
|
+
|
10
|
+
def tag_cleaner
|
11
|
+
self.gsub(TAG_PATTERN, EMPTY_STR)
|
12
|
+
end
|
13
|
+
|
14
|
+
def html_tag_cleaner
|
15
|
+
self.gsub(ANY_HTML_TAG_PATTERN, EMPTY_STR)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class String
|
21
|
+
include StringUtilityBelt::Tags
|
22
|
+
end
|