string_utility_belt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +51 -0
- data/lib/general/general.rb +28 -0
- data/lib/html_and_aml/helpers/entities.rb +16 -0
- data/lib/html_and_aml/helpers/tags.rb +13 -0
- data/lib/html_and_aml/html_and_aml.rb +10 -0
- data/lib/match_rank/match_rank.rb +43 -0
- data/lib/regex_me/helpers/string/regex_me.rb +41 -0
- data/lib/regex_me/regex_me.rb +81 -0
- data/lib/string_utility_belt/version.rb +8 -0
- data/lib/string_utility_belt.rb +18 -0
- data/string_utility_belt.gemspec +10 -0
- metadata +77 -0
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# To change this template, choose Tools | Templates
|
3
|
+
# and open the template in the editor.
|
4
|
+
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'rake'
|
8
|
+
require 'rake/clean'
|
9
|
+
require 'rake/gempackagetask'
|
10
|
+
require 'rake/rdoctask'
|
11
|
+
require 'rake/testtask'
|
12
|
+
require 'spec/rake/spectask'
|
13
|
+
|
14
|
+
spec = Gem::Specification.new do |s|
|
15
|
+
s.name = 'serradura-string_utility_belt'
|
16
|
+
s.version = '0.0.1'
|
17
|
+
s.has_rdoc = true
|
18
|
+
s.extra_rdoc_files = ['README', 'LICENSE']
|
19
|
+
s.summary = 'Your summary here'
|
20
|
+
s.description = s.summary
|
21
|
+
s.author = ''
|
22
|
+
s.email = ''
|
23
|
+
# s.executables = ['your_executable_here']
|
24
|
+
s.files = %w(LICENSE README Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
|
25
|
+
s.require_path = "lib"
|
26
|
+
s.bindir = "bin"
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::GemPackageTask.new(spec) do |p|
|
30
|
+
p.gem_spec = spec
|
31
|
+
p.need_tar = true
|
32
|
+
p.need_zip = true
|
33
|
+
end
|
34
|
+
|
35
|
+
Rake::RDocTask.new do |rdoc|
|
36
|
+
files =['README', 'LICENSE', 'lib/**/*.rb']
|
37
|
+
rdoc.rdoc_files.add(files)
|
38
|
+
rdoc.main = "README" # page to start on
|
39
|
+
rdoc.title = "serradura-string_utility_belt Docs"
|
40
|
+
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
41
|
+
rdoc.options << '--line-numbers'
|
42
|
+
end
|
43
|
+
|
44
|
+
Rake::TestTask.new do |t|
|
45
|
+
t.test_files = FileList['test/**/*.rb']
|
46
|
+
end
|
47
|
+
|
48
|
+
Spec::Rake::SpecTask.new do |t|
|
49
|
+
t.spec_files = FileList['spec/**/*.rb']
|
50
|
+
t.libs << Dir["lib"]
|
51
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
|
2
|
+
module General
|
3
|
+
|
4
|
+
def words
|
5
|
+
self.scan(/\w[\w\'\-]*/)
|
6
|
+
end
|
7
|
+
|
8
|
+
def have_this_words? words_to_match, exact_word=false
|
9
|
+
helper_have_this_words? words_to_match, exact_word do |string, word, exact_world|
|
10
|
+
return false if (string !~ word.regex_me_to_search_ruby(:exact_word => exact_word, :case_insensitive => true))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def not_have_this_words? words_to_match, exact_word=false
|
15
|
+
helper_have_this_words? words_to_match, exact_word do |string, word, exact_world|
|
16
|
+
return false if (string =~ word.regex_me_to_search_ruby(:exact_word => exact_word, :case_insensitive => true))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
def helper_have_this_words? words_to_match, exact_word
|
22
|
+
for word in words_to_match
|
23
|
+
yield self, word, exact_word
|
24
|
+
end
|
25
|
+
return true
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
|
2
|
+
require "rubygems" if RUBY_VERSION < "1.9"
|
3
|
+
require "htmlentities"
|
4
|
+
|
5
|
+
module Entities
|
6
|
+
|
7
|
+
def decode_entities
|
8
|
+
coder = HTMLEntities.new
|
9
|
+
coder.decode(self)
|
10
|
+
end
|
11
|
+
|
12
|
+
def decode_entities_and_cleaner
|
13
|
+
decode_entities.tag_cleaner
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
|
2
|
+
module Tags
|
3
|
+
|
4
|
+
def tag_cleaner
|
5
|
+
self.gsub(/<\/?[^>]*>/, "")
|
6
|
+
end
|
7
|
+
|
8
|
+
def html_tag_cleaner
|
9
|
+
sef.gsub(/<\/?(a|abbr|acronym|address|applet|area|b|base|basefont|bdo|big|blockquote|body|br|button|caption|center|cite|code|col|colgroup|dd|del|dfn|dir|div|dl|dt|em|fieldset|font|form|frame|frameset|h6|head|hr|html|i|iframe|img|input|ins|isindex|kbd|label|legend|li|link|map|menu|meta|noframes|noscript|object)[^>]+??>/im, "")
|
10
|
+
# TAGs disponíveis até 09/2010 - FONTE: http://www.w3schools.com/tags/default.asp
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
|
2
|
+
module MatchRank
|
3
|
+
|
4
|
+
def match_and_score_by words_to_match
|
5
|
+
freq = self.total_frequency_by words_to_match
|
6
|
+
statistic = {:exact => freq[:exact].to_f, :matched => freq[:matched].to_f, :precision => 0.0}
|
7
|
+
|
8
|
+
statistic[:precision] = (statistic[:exact] / statistic[:matched]) * 100
|
9
|
+
|
10
|
+
return statistic
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
def frequency_by words_to_match, frequency_object_a, frequency_object_b
|
15
|
+
self_words = self.words
|
16
|
+
freq = {:exact => frequency_object_a, :matched => frequency_object_b}
|
17
|
+
|
18
|
+
for word_to_match in words_to_match
|
19
|
+
for word in self_words
|
20
|
+
yield freq, word_to_match, word
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
return freq
|
25
|
+
end
|
26
|
+
|
27
|
+
public
|
28
|
+
def words_frequency_by words_to_match
|
29
|
+
frequency_by(words_to_match, Hash.new(0), Hash.new(0)) do |freq, word_to_match, word|
|
30
|
+
freq[:exact][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
31
|
+
freq[:matched][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def total_frequency_by words_to_match
|
36
|
+
frequency_by(words_to_match, 0, 0) do |freq, word_to_match, word|
|
37
|
+
freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
|
38
|
+
freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
class String
|
3
|
+
|
4
|
+
def regex_builder options={}
|
5
|
+
self.gsub!(/\*/,'.*') if options[:any]
|
6
|
+
border_me(options[:border][:to],
|
7
|
+
options[:border][:direction]) if options[:border]
|
8
|
+
insert_OR_in_right unless options[:delete_or]
|
9
|
+
self
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
def insert_OR_in_right
|
14
|
+
self.insert(-1, "|")
|
15
|
+
end
|
16
|
+
|
17
|
+
def border_me border_to, direction
|
18
|
+
border = define_border_metachar(border_to)
|
19
|
+
|
20
|
+
case direction
|
21
|
+
when :left
|
22
|
+
self.insert(0, border[:left])
|
23
|
+
when :right
|
24
|
+
self.insert(-1, border[:right])
|
25
|
+
when :both
|
26
|
+
self.insert(0, border[:left]).insert(-1, border[:right])
|
27
|
+
when nil
|
28
|
+
self
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def define_border_metachar border_to
|
33
|
+
case border_to
|
34
|
+
when :ruby
|
35
|
+
{:left => '\b' , :right => '\b'}
|
36
|
+
when :mysql
|
37
|
+
{:left => '[[:<:]]', :right => '[[:>:]]'}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
require File.join(File.dirname(__FILE__), "helpers", "string", "regex_me")
|
3
|
+
|
4
|
+
module RegexMe
|
5
|
+
|
6
|
+
module To
|
7
|
+
|
8
|
+
module Search
|
9
|
+
|
10
|
+
private
|
11
|
+
def options_handler options
|
12
|
+
{
|
13
|
+
:exact_word => options[:exact_word],
|
14
|
+
:case_insensitive => (options[:case_insensitive] ? :i : nil ),
|
15
|
+
:m => (options[:m] ? :m : nil ),
|
16
|
+
:exact_phrase => options[:exact_phrase]
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
def regex_me_to_search regex_empty, border_to, options
|
22
|
+
opt_handled = options_handler(options)
|
23
|
+
|
24
|
+
return regex_empty if self.strip.empty?
|
25
|
+
execute_builder(self, opt_handled, border_to)
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
public
|
30
|
+
def regex_me_to_search_ruby options={}
|
31
|
+
regex_me_to_search(//, :ruby, options)
|
32
|
+
end
|
33
|
+
|
34
|
+
def regex_me_to_search_mysql options={}
|
35
|
+
regex_me_to_search("", :mysql, options)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
def execute_builder string, opt_handled, border_to
|
40
|
+
result_builder = builder(string, opt_handled[:exact_word], border_to, opt_handled[:exact_phrase])
|
41
|
+
|
42
|
+
case border_to
|
43
|
+
when :ruby
|
44
|
+
eval "/#{result_builder}/#{opt_handled[:case_insensitive]}#{opt_handled[:m]}"
|
45
|
+
when :mysql
|
46
|
+
result_builder.gsub(/\\b/,"[[:<:]]").gsub(/\\b$/, "[[:>:]])")
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def builder string, exact_word, border_to, exact_phrase
|
52
|
+
|
53
|
+
if exact_phrase
|
54
|
+
regexp = string.gsub(/\s+/, " ").gsub(/\s/, '[^0-9a-zA-Z\_]+').regex_builder(:delete_or => true, :border => {:to => border_to, :direction => :both})
|
55
|
+
else
|
56
|
+
regexp = '('
|
57
|
+
|
58
|
+
for word in string.strip.split
|
59
|
+
case word
|
60
|
+
when/^\*/
|
61
|
+
regexp << word.regex_builder(:any => true, :border => {:to => border_to, :direction => :right})
|
62
|
+
when /\*$/
|
63
|
+
regexp << word.regex_builder(:any => true, :border => {:to => border_to, :direction => :left})
|
64
|
+
when /^.*\*.*$/
|
65
|
+
regexp << word.regex_builder(:any => true, :border => {:to => border_to, :direction => :both})
|
66
|
+
else
|
67
|
+
regexp << (exact_word ? word.regex_builder(:border => {:to => border_to, :direction => :both}) : word.regex_builder)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
regexp = (regexp << ')').sub!(/\|\)/,')')
|
72
|
+
end
|
73
|
+
|
74
|
+
return regexp
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
require "general/general"
|
3
|
+
require "html_and_aml/html_and_aml"
|
4
|
+
require "match_rank/match_rank"
|
5
|
+
require "regex_me/regex_me"
|
6
|
+
|
7
|
+
module StringUtilityBelt
|
8
|
+
|
9
|
+
include RegexMe::To::Search
|
10
|
+
include MatchRank
|
11
|
+
include General
|
12
|
+
include HtmlAndAML
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
class String
|
17
|
+
include StringUtilityBelt
|
18
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), 'lib')
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "string_utility_belt"
|
5
|
+
s.version = "0.1.0"
|
6
|
+
s.description = "Useful methods for strings!"
|
7
|
+
s.summary = "Useful methods for strings!"
|
8
|
+
s.author = "Rodrigo Serradura"
|
9
|
+
s.files = Dir["{lib/**/*.rb,lib/**/**/*.rb,lib/**/**/**/*.rb,README.rdoc,Rakefile,*.gemspec}"]
|
10
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: string_utility_belt
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Rodrigo Serradura
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-09-02 00:00:00 -03:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Useful methods for strings!
|
23
|
+
email:
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- lib/html_and_aml/helpers/tags.rb
|
32
|
+
- lib/html_and_aml/helpers/entities.rb
|
33
|
+
- lib/html_and_aml/html_and_aml.rb
|
34
|
+
- lib/string_utility_belt.rb
|
35
|
+
- lib/regex_me/regex_me.rb
|
36
|
+
- lib/regex_me/helpers/string/regex_me.rb
|
37
|
+
- lib/general/general.rb
|
38
|
+
- lib/match_rank/match_rank.rb
|
39
|
+
- lib/string_utility_belt/version.rb
|
40
|
+
- Rakefile
|
41
|
+
- string_utility_belt.gemspec
|
42
|
+
has_rdoc: true
|
43
|
+
homepage:
|
44
|
+
licenses: []
|
45
|
+
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
hash: 3
|
66
|
+
segments:
|
67
|
+
- 0
|
68
|
+
version: "0"
|
69
|
+
requirements: []
|
70
|
+
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.3.7
|
73
|
+
signing_key:
|
74
|
+
specification_version: 3
|
75
|
+
summary: Useful methods for strings!
|
76
|
+
test_files: []
|
77
|
+
|