string_utility_belt 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +28 -0
- data/README.markdown +145 -0
- data/Rakefile +41 -33
- data/lib/string_utility_belt/entities.rb +23 -0
- data/lib/string_utility_belt/general.rb +72 -0
- data/lib/{match_rank → string_utility_belt}/match_rank.rb +30 -24
- data/lib/string_utility_belt/regex_me_helper.rb +100 -0
- data/lib/string_utility_belt/regex_me_to_search.rb +107 -0
- data/lib/string_utility_belt/tags.rb +22 -0
- data/lib/string_utility_belt/version.rb +1 -6
- data/lib/string_utility_belt.rb +6 -17
- data/test/string_utility_belt/entities_test.rb +17 -0
- data/test/string_utility_belt/general_test.rb +73 -0
- data/test/string_utility_belt/match_rank_test.rb +64 -0
- data/test/string_utility_belt/regex_me_helper_test.rb +117 -0
- data/test/string_utility_belt/regex_me_to_search_test.rb +106 -0
- data/test/string_utility_belt/tags_test.rb +25 -0
- data/test/test_helper.rb +5 -0
- metadata +30 -22
- data/lib/general/general.rb +0 -36
- data/lib/html_and_aml/helpers/entities.rb +0 -16
- data/lib/html_and_aml/helpers/tags.rb +0 -13
- data/lib/html_and_aml/html_and_aml.rb +0 -10
- data/lib/regex_me/helpers/string/regex_me.rb +0 -73
- data/lib/regex_me/regex_me.rb +0 -84
- data/string_utility_belt.gemspec +0 -10
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
string_utility_belt (0.2.3)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
columnize (0.3.3)
|
10
|
+
htmlentities (4.3.0)
|
11
|
+
linecache (0.46)
|
12
|
+
rbx-require-relative (> 0.0.4)
|
13
|
+
rbx-require-relative (0.0.5)
|
14
|
+
rcov (0.9.9)
|
15
|
+
ruby-debug (0.10.4)
|
16
|
+
columnize (>= 0.1)
|
17
|
+
ruby-debug-base (~> 0.10.4.0)
|
18
|
+
ruby-debug-base (0.10.4)
|
19
|
+
linecache (>= 0.3)
|
20
|
+
|
21
|
+
PLATFORMS
|
22
|
+
ruby
|
23
|
+
|
24
|
+
DEPENDENCIES
|
25
|
+
htmlentities (= 4.3.0)
|
26
|
+
rcov
|
27
|
+
ruby-debug
|
28
|
+
string_utility_belt!
|
data/README.markdown
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# serradura-string_utility_belt
|
2
|
+
|
3
|
+
## Links
|
4
|
+
|
5
|
+
<a href='http://rubygems.org/gems/string_utility_belt'>http://rubygems.org/gems/string_utility_belt</a>
|
6
|
+
|
7
|
+
<a href="http://github.com/serradura/string_utility_belt">http://github.com/serradura/string_utility_belt</a>
|
8
|
+
|
9
|
+
## Install
|
10
|
+
|
11
|
+
gem install string_utility_belt
|
12
|
+
|
13
|
+
## Let's code!
|
14
|
+
|
15
|
+
Pessoal,
|
16
|
+
Peguei um amontoado de métodos que andei desenvolvendo para strings e surgiu essa gem!
|
17
|
+
|
18
|
+
Dentre alguns dos módulos que ela tem é o módulo que transforma a String em uma Regex, o nome do módulo é RegexMe! :p
|
19
|
+
|
20
|
+
Segue alguns exemplos:
|
21
|
+
|
22
|
+
>> require "string_utility_belt"
|
23
|
+
>> "coca cola".regex_me_to_search_ruby
|
24
|
+
=> /(coca|cola)/
|
25
|
+
|
26
|
+
Qual a utilidade disso???
|
27
|
+
|
28
|
+
Imagine que você tem a seguinte coleção:
|
29
|
+
|
30
|
+
minha_colecao = %w{carro caminhão moto lancha avião banana bonono benene}
|
31
|
+
|
32
|
+
E você que selecionar as palavras que contenha: car e mo
|
33
|
+
|
34
|
+
minha_colecao.select { |item| item =~ "car mo".regex_me_to_search_ruby }
|
35
|
+
#=> ["carro", "moto"]
|
36
|
+
|
37
|
+
Mas e se você quiser as palavras que termine com a letra "a" e que case com b*n*n*
|
38
|
+
|
39
|
+
minha_colecao.select { |item| item =~ "*a b*n*n*".regex_me_to_search_ruby }
|
40
|
+
#=> ["lancha", "banana", "bonono", "benene"]
|
41
|
+
|
42
|
+
Vamos tentar só com o que termina com a letra "a"
|
43
|
+
|
44
|
+
minha_colecao.select { |item| item =~ "*a".regex_me_to_search_ruby }
|
45
|
+
#=> ["lancha", "banana"]
|
46
|
+
|
47
|
+
Que comece com a letra "m"
|
48
|
+
|
49
|
+
minha_colecao.select { |item| item =~ "m*".regex_me_to_search_ruby }
|
50
|
+
#=> ["moto"]
|
51
|
+
|
52
|
+
Uaaaaauuuu, entenderam as possibilidades?
|
53
|
+
Você também pode fazer:
|
54
|
+
|
55
|
+
minha_colecao.select { |item| item =~ "m* car *a b*n*".regex_me_to_search_ruby }
|
56
|
+
#=> ["carro", "moto", "lancha", "banana", "bonono", "benene"]
|
57
|
+
|
58
|
+
Você também pode passar alguns parametros para criar Regex mais inteligentes:
|
59
|
+
|
60
|
+
Regexs que ignoram case sensitive
|
61
|
+
|
62
|
+
minha_colecao.select { |item| item =~ "N".regex_me_to_search_ruby(:case_insensitive => true) }
|
63
|
+
#=> ["caminhão", "lancha", "banana", "bonono", "benene"]
|
64
|
+
|
65
|
+
Que case palavras exatas idependente se é maiuscula ou minuscula
|
66
|
+
|
67
|
+
["Ruby Rails", "Ruby on Rails", "Ruby - Rails"].select { |item| item =~ "ruby rails".regex_me_to_search_ruby(:case_insensitive => true, :exact_phrase => true) }
|
68
|
+
#=> ["Ruby Rails", "Ruby - Rails"]
|
69
|
+
|
70
|
+
O parâmetro :exact_phrase ignora qualquer caracter do tipo
|
71
|
+
letra (maiúscula ou minúscula) e números além do char "_"
|
72
|
+
me basei na regra utilizada pelo twitter e google quando usamos "
|
73
|
+
|
74
|
+
Ex: "Ruby Rails"
|
75
|
+
|
76
|
+
Que tenha palavras exatas
|
77
|
+
|
78
|
+
minha_colecao.select { |item| item =~ "car".regex_me_to_search_ruby(:exact_word => true) }
|
79
|
+
#=> []
|
80
|
+
|
81
|
+
Se eu quiser que palavras que contenham car
|
82
|
+
|
83
|
+
minha_colecao.select { |item| item =~ "car".regex_me_to_search_ruby }
|
84
|
+
#=> ["carro"]
|
85
|
+
|
86
|
+
Agora vamos supor que o usuário queira casar a palavra estágio
|
87
|
+
mas nos textos que ele estava buscando a palavra ele percebeu que existia palavra com e sem acentuação
|
88
|
+
|
89
|
+
palavras = %w{estagio estágio éstágio estagió estagios}
|
90
|
+
|
91
|
+
palavras.select { |palavra| palavra =~ "estágio".regex_me_to_search_ruby }
|
92
|
+
#=> ["estágio"]
|
93
|
+
|
94
|
+
E agora e se eu quiser casar as palavras independente da acentuação???
|
95
|
+
|
96
|
+
palavras.select { |palavra| palavra =~ "estágio".regex_me_to_search_ruby(:latin_chars_variation => true)}
|
97
|
+
#=> ["estagio", "estágio", "éstágio", "estagió", "estagios"]
|
98
|
+
|
99
|
+
Mas se eu quiser apenas estágio, e ignorar estagios por exemplo???
|
100
|
+
|
101
|
+
palavras.select { |palavra| palavra =~ "estágio".regex_me_to_search_ruby(:latin_chars_variation => true, :exact_word => true)}
|
102
|
+
#=> ["estagio", "estágio", "éstágio", "estagió"]
|
103
|
+
|
104
|
+
|
105
|
+
Mas saindo um pouco do ruby e pensando numa aplicação em rails.
|
106
|
+
Imagine que você tem um campo de busca e o usuário pode escrever várias palavras no campo (Igual ao google) e o resultado deverá retornar as palavras que contenham o que o usário digitou.
|
107
|
+
|
108
|
+
Imagine que você tá no controller (Embora essa lógica deveria estar no model! :D)
|
109
|
+
|
110
|
+
@textos = Texto.all(:conditions => ["texto REGEXP ?", params[:busca].regex_me_to_search_mysql])
|
111
|
+
|
112
|
+
Por enquanto as regex estão prontas para o mysql e você pode utilizar todos os parâmetros que foram apresentados acima!
|
113
|
+
|
114
|
+
Perceberam os ganho que tivemos na aplicação???
|
115
|
+
|
116
|
+
O usuário pode escrever no form:
|
117
|
+
|
118
|
+
car* *a c*r*
|
119
|
+
|
120
|
+
Só com isso você já da o poder do usuário fazer pesquisas mais inteligentes e você só utiliza um método!
|
121
|
+
|
122
|
+
Não sei se devo.... Mas muitos programadores fazem buscas dinâmicas utilizando o operador LIKE e sai essas bizarrices.
|
123
|
+
|
124
|
+
SELECT * FROM TEXTOS
|
125
|
+
WHERE texto LIKE "%CARRO%"
|
126
|
+
OR texto LIKE "%MOTO%"
|
127
|
+
OR texto LIKE "%AVIAO%"
|
128
|
+
|
129
|
+
Já com o <b>string_utility_belt</b> ele vai gerar.
|
130
|
+
|
131
|
+
SELECT * FROM TEXTOS WHERE texto REGEXP "(CARRO|MOTO|AVIAO)"
|
132
|
+
|
133
|
+
Que é muito mais inteligente e poderoso!!!
|
134
|
+
|
135
|
+
É isso!!!
|
136
|
+
|
137
|
+
A api tem outras funcionalidades bem interessantes...
|
138
|
+
Mas para começar vou documentar apenas o módulo RegexMe!
|
139
|
+
|
140
|
+
E caso você queira colaborar...
|
141
|
+
Dê um fork no projeto envie seus códigos e publicarei na GEM.
|
142
|
+
|
143
|
+
Abraço,
|
144
|
+
Serradura
|
145
|
+
|
data/Rakefile
CHANGED
@@ -1,35 +1,19 @@
|
|
1
|
-
|
2
|
-
# To change this template, choose Tools | Templates
|
3
|
-
# and open the template in the editor.
|
4
|
-
|
5
|
-
|
6
|
-
require 'rubygems'
|
1
|
+
require 'rubygems' if RUBY_VERSION < '1.9'
|
7
2
|
require 'rake'
|
8
3
|
require 'rake/clean'
|
9
|
-
require 'rake/gempackagetask'
|
10
|
-
require 'rake/rdoctask'
|
11
4
|
require 'rake/testtask'
|
12
|
-
require '
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
s.extra_rdoc_files = ['README', 'LICENSE']
|
19
|
-
s.summary = 'Your summary here'
|
20
|
-
s.description = s.summary
|
21
|
-
s.author = ''
|
22
|
-
s.email = ''
|
23
|
-
# s.executables = ['your_executable_here']
|
24
|
-
s.files = %w(LICENSE README Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
|
25
|
-
s.require_path = "lib"
|
26
|
-
s.bindir = "bin"
|
5
|
+
require 'bundler/gem_tasks'
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'rake/rdoctask'
|
9
|
+
rescue
|
10
|
+
require 'rdoc/task'
|
27
11
|
end
|
28
12
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
13
|
+
begin
|
14
|
+
require 'rcov/rcovtask'
|
15
|
+
rescue
|
16
|
+
require 'rcov/task'
|
33
17
|
end
|
34
18
|
|
35
19
|
Rake::RDocTask.new do |rdoc|
|
@@ -41,11 +25,35 @@ Rake::RDocTask.new do |rdoc|
|
|
41
25
|
rdoc.options << '--line-numbers'
|
42
26
|
end
|
43
27
|
|
44
|
-
|
45
|
-
|
28
|
+
namespace :test do
|
29
|
+
Rake::TestTask.new do |t|
|
30
|
+
t.test_files = FileList['test/**/*.rb']
|
31
|
+
t.name = 'all'
|
32
|
+
end
|
46
33
|
end
|
47
34
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
35
|
+
def run_coverage(files)
|
36
|
+
rm_f "coverage"
|
37
|
+
rm_f "coverage.data"
|
38
|
+
|
39
|
+
# turn the files we want to run into a string
|
40
|
+
if files.length == 0
|
41
|
+
puts "No files were specified for testing"
|
42
|
+
return
|
43
|
+
end
|
44
|
+
|
45
|
+
files = files.join(" ")
|
46
|
+
|
47
|
+
exclude = '--exclude "usr/*"'
|
48
|
+
|
49
|
+
rcov = "rcov -Ilib:test --sort coverage --text-report #{exclude} --aggregate coverage.data"
|
50
|
+
cmd = "#{rcov} #{files}"
|
51
|
+
sh cmd
|
52
|
+
end
|
53
|
+
|
54
|
+
namespace :test do
|
55
|
+
desc 'Measures test coverage'
|
56
|
+
task :rcov do
|
57
|
+
run_coverage Dir["test/string_utility_belt/**/*.rb"]
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'htmlentities'
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module Entities
|
5
|
+
CODER = HTMLEntities.new
|
6
|
+
|
7
|
+
def generate_entities
|
8
|
+
CODER.encode(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
def decode_entities
|
12
|
+
CODER.decode(self)
|
13
|
+
end
|
14
|
+
|
15
|
+
def decode_entities_and_cleaner
|
16
|
+
decode_entities.tag_cleaner
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class String
|
22
|
+
include StringUtilityBelt::Entities
|
23
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'string_utility_belt/regex_me_to_search'
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module General
|
5
|
+
class GENERAL
|
6
|
+
CASE_INSENSITIVE_OPT = {:case_insensitive => true}
|
7
|
+
|
8
|
+
def have_this_words?(string, words_to_match, options)
|
9
|
+
@string = string
|
10
|
+
@arguments = options
|
11
|
+
|
12
|
+
for word in words_to_match
|
13
|
+
return false if string_does_not_match_with_this_word_pattern?(word)
|
14
|
+
end
|
15
|
+
|
16
|
+
return true
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
def string_does_not_match_with_this_word_pattern?(word)
|
21
|
+
@string !~ word.regex_me_to_search_ruby(arguments)
|
22
|
+
end
|
23
|
+
|
24
|
+
def arguments
|
25
|
+
if is_boolean?
|
26
|
+
CASE_INSENSITIVE_OPT.merge({:exact_word => @arguments})
|
27
|
+
elsif is_hash?
|
28
|
+
@arguments.merge(CASE_INSENSITIVE_OPT)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def is_boolean?
|
33
|
+
@arguments.instance_of?(FalseClass) || @arguments.instance_of?(TrueClass)
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_hash?
|
37
|
+
@arguments.instance_of?(Hash)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
WORD_PATTERN = /\w[\w\'\-]*/
|
42
|
+
ANY_SPACE_PATTERN = /\s+/
|
43
|
+
|
44
|
+
SIMPLE_SPACE = " "
|
45
|
+
|
46
|
+
def words
|
47
|
+
self.scan(WORD_PATTERN)
|
48
|
+
end
|
49
|
+
|
50
|
+
def simple_space
|
51
|
+
self.gsub(ANY_SPACE_PATTERN, SIMPLE_SPACE)
|
52
|
+
end
|
53
|
+
|
54
|
+
def simple_space!
|
55
|
+
self.gsub!(ANY_SPACE_PATTERN, SIMPLE_SPACE)
|
56
|
+
end
|
57
|
+
|
58
|
+
def have_this_words?(words_to_match, options = false)
|
59
|
+
i = GENERAL.new
|
60
|
+
i.have_this_words?(self, words_to_match, options)
|
61
|
+
end
|
62
|
+
|
63
|
+
def not_have_this_words?(words_to_match, options = false)
|
64
|
+
i = GENERAL.new
|
65
|
+
!i.have_this_words?(self, words_to_match, options)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class String
|
71
|
+
include StringUtilityBelt::General
|
72
|
+
end
|
@@ -1,16 +1,33 @@
|
|
1
|
+
require 'string_utility_belt/regex_me_to_search'
|
1
2
|
|
2
|
-
module
|
3
|
+
module StringUtilityBelt
|
4
|
+
module MatchRank
|
3
5
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
def total_frequency_by words_to_match
|
7
|
+
frequency_by(words_to_match, 0, 0) do |freq, word_to_match, word|
|
8
|
+
freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
9
|
+
freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
10
|
+
end
|
11
|
+
end
|
9
12
|
|
10
|
-
|
11
|
-
|
13
|
+
def words_frequency_by words_to_match
|
14
|
+
frequency_by(words_to_match, Hash.new(0), Hash.new(0)) do |freq, word_to_match, word|
|
15
|
+
freq[:exact][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
16
|
+
freq[:matched][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def match_and_score_by words_to_match
|
21
|
+
freq = self.total_frequency_by words_to_match
|
22
|
+
statistic = {:exact => freq[:exact].to_f, :matched => freq[:matched].to_f, :precision => 0.0}
|
23
|
+
|
24
|
+
statistic[:precision] = (statistic[:exact] / statistic[:matched]) * 100
|
25
|
+
|
26
|
+
return statistic
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
12
30
|
|
13
|
-
private
|
14
31
|
def frequency_by words_to_match, frequency_object_a, frequency_object_b
|
15
32
|
self_words = self.words
|
16
33
|
freq = {:exact => frequency_object_a, :matched => frequency_object_b}
|
@@ -24,20 +41,9 @@ module MatchRank
|
|
24
41
|
return freq
|
25
42
|
end
|
26
43
|
|
27
|
-
|
28
|
-
def words_frequency_by words_to_match
|
29
|
-
frequency_by(words_to_match, Hash.new(0), Hash.new(0)) do |freq, word_to_match, word|
|
30
|
-
freq[:exact][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
31
|
-
freq[:matched][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def total_frequency_by words_to_match
|
36
|
-
frequency_by(words_to_match, 0, 0) do |freq, word_to_match, word|
|
37
|
-
freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
38
|
-
freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
44
|
+
end
|
42
45
|
end
|
43
46
|
|
47
|
+
class String
|
48
|
+
include StringUtilityBelt::MatchRank
|
49
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module RegexMe
|
4
|
+
module Helper
|
5
|
+
A_VARIATIONS = "(a|à|á|â|ã|ä)"
|
6
|
+
E_VARIATIONS = "(e|è|é|ê|ë)"
|
7
|
+
I_VARIATIONS = "(i|ì|í|î|ï)"
|
8
|
+
O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
|
9
|
+
U_VARIATIONS = "(u|ù|ú|û|ü)"
|
10
|
+
C_VARIATIONS = "(c|ç)"
|
11
|
+
N_VARIATIONS = "(n|ñ)"
|
12
|
+
|
13
|
+
LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
|
14
|
+
E_VARIATIONS,
|
15
|
+
I_VARIATIONS,
|
16
|
+
O_VARIATIONS,
|
17
|
+
U_VARIATIONS,
|
18
|
+
C_VARIATIONS,
|
19
|
+
N_VARIATIONS]
|
20
|
+
|
21
|
+
BORDER_TO = {
|
22
|
+
:ruby => {:left => '\b', :right => '\b' },
|
23
|
+
:mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
|
24
|
+
}
|
25
|
+
|
26
|
+
def regex_latin_ci_list
|
27
|
+
memo = ""
|
28
|
+
|
29
|
+
self.each_char do |char|
|
30
|
+
changed = false
|
31
|
+
|
32
|
+
for variations in LATIN_CHARS_VARIATIONS
|
33
|
+
variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
|
34
|
+
|
35
|
+
if char =~ variations_pattern
|
36
|
+
changed = true
|
37
|
+
memo.insert(-1, variations)
|
38
|
+
break
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
memo.insert(-1, char) unless changed
|
43
|
+
end
|
44
|
+
|
45
|
+
self.replace(memo)
|
46
|
+
end
|
47
|
+
|
48
|
+
def regex_builder(options)
|
49
|
+
if options[:any]
|
50
|
+
replace_the_any_char_per_any_pattern
|
51
|
+
end
|
52
|
+
|
53
|
+
if options[:latin_chars_variations]
|
54
|
+
replace_chars_includeds_in_latin_variation_list
|
55
|
+
end
|
56
|
+
|
57
|
+
if options[:border]
|
58
|
+
insert_border(options[:border])
|
59
|
+
end
|
60
|
+
|
61
|
+
if options[:or]
|
62
|
+
insert_OR
|
63
|
+
end
|
64
|
+
|
65
|
+
return self
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def replace_the_any_char_per_any_pattern
|
70
|
+
self.gsub!(/\*/, '.*')
|
71
|
+
end
|
72
|
+
|
73
|
+
def replace_chars_includeds_in_latin_variation_list
|
74
|
+
self.regex_latin_ci_list
|
75
|
+
end
|
76
|
+
|
77
|
+
def insert_border(options)
|
78
|
+
border = BORDER_TO[options[:to]]
|
79
|
+
|
80
|
+
case options[:direction]
|
81
|
+
when :left
|
82
|
+
self.insert(0, border[:left])
|
83
|
+
when :right
|
84
|
+
self.insert(-1, border[:right])
|
85
|
+
when :both
|
86
|
+
self.insert(0, border[:left]).insert(-1, border[:right])
|
87
|
+
else
|
88
|
+
self
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def insert_OR
|
93
|
+
self.insert(-1, "|")
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class String
|
99
|
+
include RegexMe::Helper
|
100
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'string_utility_belt/regex_me_helper'
|
2
|
+
|
3
|
+
module StringUtilityBelt
|
4
|
+
module RegexMe
|
5
|
+
EMPTYs = {:ruby => //, :mysql => ''}
|
6
|
+
WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
|
7
|
+
|
8
|
+
module To
|
9
|
+
module Search
|
10
|
+
def regex_me_to_search_ruby(options = {})
|
11
|
+
regex_me_to_search(:ruby, options)
|
12
|
+
end
|
13
|
+
|
14
|
+
def regex_me_to_search_mysql(options = {})
|
15
|
+
regex_me_to_search(:mysql, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def options_handler(options)
|
21
|
+
handled = \
|
22
|
+
{:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
|
23
|
+
:multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
|
24
|
+
:or => (options[:or] == false ? false : true)}
|
25
|
+
|
26
|
+
return options.merge(handled)
|
27
|
+
end
|
28
|
+
|
29
|
+
def regex_me_to_search(env, options)
|
30
|
+
return EMPTYs[env] if self.strip.empty?
|
31
|
+
|
32
|
+
execute_builder(env, options)
|
33
|
+
end
|
34
|
+
|
35
|
+
def execute_builder(env, options)
|
36
|
+
opt_handled = options_handler(options)
|
37
|
+
|
38
|
+
builder_result = builder(env, opt_handled)
|
39
|
+
|
40
|
+
case env
|
41
|
+
when :ruby
|
42
|
+
options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
|
43
|
+
Regexp.new(builder_result, *options)
|
44
|
+
when :mysql
|
45
|
+
builder_result
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def builder(border_to, options)
|
50
|
+
string = self
|
51
|
+
|
52
|
+
lcv = options[:latin_chars_variations]
|
53
|
+
|
54
|
+
if options[:exact_phrase]
|
55
|
+
@regexp = \
|
56
|
+
string \
|
57
|
+
.strip.simple_space \
|
58
|
+
.regex_latin_ci_list \
|
59
|
+
.gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
|
60
|
+
.regex_builder(:or => false,
|
61
|
+
:border => {:to => border_to,
|
62
|
+
:direction => :both})
|
63
|
+
else
|
64
|
+
@regexp = '('
|
65
|
+
|
66
|
+
for word in string.strip.split
|
67
|
+
if options[:exact_word]
|
68
|
+
@regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
|
69
|
+
elsif have_the_any_char?(word)
|
70
|
+
@regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
|
71
|
+
else
|
72
|
+
@regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
@regexp = (@regexp << ')').sub!(/\|\)/,')')
|
77
|
+
end
|
78
|
+
|
79
|
+
return @regexp
|
80
|
+
end
|
81
|
+
|
82
|
+
def have_the_any_char?(string)
|
83
|
+
string.include?('*')
|
84
|
+
end
|
85
|
+
|
86
|
+
def border(to, word)
|
87
|
+
direction = nil
|
88
|
+
|
89
|
+
case word
|
90
|
+
when/^\*/
|
91
|
+
direction = :right
|
92
|
+
when /\*$/
|
93
|
+
direction = :left
|
94
|
+
when /^.*\*.*$/
|
95
|
+
direction = :both
|
96
|
+
end
|
97
|
+
|
98
|
+
{:to => to, :direction => direction}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class String
|
106
|
+
include StringUtilityBelt::RegexMe::To::Search
|
107
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module StringUtilityBelt
|
2
|
+
module Tags
|
3
|
+
EMPTY_STR = ''
|
4
|
+
|
5
|
+
TAG_PATTERN = /<[^<]*?>/
|
6
|
+
|
7
|
+
# TAGs disponíveis até 09/2010 - FONTE: http://www.w3schools.com/tags/default.asp
|
8
|
+
ANY_HTML_TAG_PATTERN = /<\/?(a|p|abbr|acronym|address|applet|area|b|base|basefont|bdo|big|blockquote|body|br|button|caption|center|cite|code|col|colgroup|dd|del|dfn|dir|div|dl|dt|em|fieldset|font|form|frame|frameset|h6|head|hr|html|i|iframe|img|input|ins|isindex|kbd|label|legend|li|link|map|menu|meta|noframes|noscript|object)[^>]+??>/im
|
9
|
+
|
10
|
+
def tag_cleaner
|
11
|
+
self.gsub(TAG_PATTERN, EMPTY_STR)
|
12
|
+
end
|
13
|
+
|
14
|
+
def html_tag_cleaner
|
15
|
+
self.gsub(ANY_HTML_TAG_PATTERN, EMPTY_STR)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class String
|
21
|
+
include StringUtilityBelt::Tags
|
22
|
+
end
|