string_utility_belt 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  require 'string_utility_belt/version'
2
2
 
3
- require 'string_utility_belt/regex_me_to_search'
3
+ require 'string_utility_belt/regex_me'
4
4
  require 'string_utility_belt/general'
5
5
  require 'string_utility_belt/match_rank'
6
6
  require 'string_utility_belt/tags'
@@ -1,41 +1,48 @@
1
- require 'string_utility_belt/regex_me_to_search'
1
+ require 'string_utility_belt/regex_me'
2
2
 
3
3
  module StringUtilityBelt
4
4
  module General
5
5
  class GENERAL
6
6
  CASE_INSENSITIVE_OPT = {:case_insensitive => true}
7
7
 
8
- def have_this_words?(string, words_to_match, options)
9
- @string = string
8
+ def initialize(string)
9
+ @string = string
10
+ end
11
+
12
+ def have_this_words?(words_to_match, options)
10
13
  @arguments = options
11
14
 
12
15
  for word in words_to_match
13
- return false if string_does_not_match_with_this_word_pattern?(word)
16
+ return false if not word_is_found_in_the_string?(word)
14
17
  end
15
18
 
16
19
  return true
17
20
  end
18
21
 
19
22
  private
20
- def string_does_not_match_with_this_word_pattern?(word)
21
- @string !~ word.regex_me_to_search_ruby(arguments)
23
+ def word_is_found_in_the_string?(word)
24
+ @string =~ word.regex_me_to_search_ruby(arguments)
22
25
  end
23
26
 
24
27
  def arguments
25
- if is_boolean?
28
+ if args_is_nil? or args_is_boolean?
26
29
  CASE_INSENSITIVE_OPT.merge({:exact_word => @arguments})
27
- elsif is_hash?
30
+ elsif args_is_hash?
28
31
  @arguments.merge(CASE_INSENSITIVE_OPT)
29
32
  end
30
33
  end
31
34
 
32
- def is_boolean?
35
+ def args_is_boolean?
33
36
  @arguments.instance_of?(FalseClass) || @arguments.instance_of?(TrueClass)
34
37
  end
35
38
 
36
- def is_hash?
39
+ def args_is_hash?
37
40
  @arguments.instance_of?(Hash)
38
41
  end
42
+
43
+ def args_is_nil?
44
+ @arguments.nil?
45
+ end
39
46
  end
40
47
 
41
48
  WORD_PATTERN = /\w[\w\'\-]*/
@@ -55,14 +62,14 @@ module StringUtilityBelt
55
62
  self.gsub!(ANY_SPACE_PATTERN, SIMPLE_SPACE)
56
63
  end
57
64
 
58
- def have_this_words?(words_to_match, options = false)
59
- i = GENERAL.new
60
- i.have_this_words?(self, words_to_match, options)
65
+ def have_this_words?(words_to_match, options = nil)
66
+ i = GENERAL.new(self)
67
+ i.have_this_words?(words_to_match, options)
61
68
  end
62
69
 
63
- def not_have_this_words?(words_to_match, options = false)
64
- i = GENERAL.new
65
- !i.have_this_words?(self, words_to_match, options)
70
+ def not_have_this_words?(words_to_match, options = nil)
71
+ i = GENERAL.new(self)
72
+ !i.have_this_words?(words_to_match, options)
66
73
  end
67
74
  end
68
75
  end
@@ -1,46 +1,103 @@
1
- require 'string_utility_belt/regex_me_to_search'
1
+ require 'string_utility_belt/regex_me'
2
2
 
3
3
  module StringUtilityBelt
4
4
  module MatchRank
5
+ class MATCHRANK
6
+ TOTAL_TEMPLATE = {:exact => 0, :matched => 0}
7
+ SCORE_TEMPLATE = TOTAL_TEMPLATE.merge({:precision => 0})
5
8
 
6
- def total_frequency_by words_to_match
7
- frequency_by(words_to_match, 0, 0) do |freq, word_to_match, word|
8
- freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
9
- freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
9
+ def initialize(string)
10
+ @text = string
10
11
  end
11
- end
12
12
 
13
- def words_frequency_by words_to_match
14
- frequency_by(words_to_match, Hash.new(0), Hash.new(0)) do |freq, word_to_match, word|
15
- freq[:exact][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
16
- freq[:matched][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
13
+ def frequency(options)
14
+ @options = options
15
+ @result = result_template
16
+
17
+ for @search_word in search_words
18
+ for @text_word in @text.words
19
+ count_exact_matches
20
+ count_fragmet_matches
21
+ end
22
+ end
23
+
24
+ calculate_the_precision if @options[:template] == :precision
25
+
26
+ return @result
17
27
  end
18
- end
19
28
 
20
- def match_and_score_by words_to_match
21
- freq = self.total_frequency_by words_to_match
22
- statistic = {:exact => freq[:exact].to_f, :matched => freq[:matched].to_f, :precision => 0.0}
29
+ private
30
+
31
+ def search_words
32
+ words = @options[:search_words]
33
+ words.instance_of?(Array) ? words : words.to_s.words
34
+ end
23
35
 
24
- statistic[:precision] = (statistic[:exact] / statistic[:matched]) * 100
36
+ def count_exact_matches
37
+ incr_result(:exact) if find_the_wanted_word(:with_precision => true)
38
+ end
25
39
 
26
- return statistic
27
- end
40
+ def count_fragmet_matches
41
+ incr_result(:matched) if find_the_wanted_word(:with_precision => false)
42
+ end
28
43
 
29
- private
44
+ def calculate_the_precision
45
+ @result[:precision] = (@result[:exact].to_f / @result[:matched].to_f) * 100
46
+ end
47
+
48
+ def find_the_wanted_word(option)
49
+ @text_word =~ matcher(option[:with_precision])
50
+ end
30
51
 
31
- def frequency_by words_to_match, frequency_object_a, frequency_object_b
32
- self_words = self.words
33
- freq = {:exact => frequency_object_a, :matched => frequency_object_b}
52
+ def matcher(precision)
53
+ @search_word.regex_me_to_search_ruby(:exact_word => precision,
54
+ :case_insensitive => true)
55
+ end
34
56
 
35
- for word_to_match in words_to_match
36
- for word in self_words
37
- yield freq, word_to_match, word
57
+ def incr_result(key)
58
+ case @options[:template]
59
+ when :grouped_words
60
+ begin
61
+ @result[key][@search_word] += 1
62
+ rescue
63
+ @result[key][@search_word] = 0
64
+ @result[key][@search_word] += 1
65
+ end
66
+ else
67
+ @result[key] += 1
38
68
  end
39
69
  end
40
70
 
41
- return freq
71
+ def result_template
72
+ templates.fetch(@options[:template]).clone
73
+ end
74
+
75
+ def templates
76
+ {
77
+ :total => TOTAL_TEMPLATE,
78
+ :precision => SCORE_TEMPLATE,
79
+ :grouped_words => {:exact => {}, :matched => {}}
80
+ }
81
+ end
82
+ end
83
+
84
+ def total_frequency_by(words)
85
+ measure.frequency(:template => :total, :search_words => words)
42
86
  end
43
87
 
88
+ def words_frequency_by(words)
89
+ measure.frequency(:template => :grouped_words, :search_words => words)
90
+ end
91
+
92
+ def match_and_score_by(words)
93
+ measure.frequency(:template => :precision, :search_words => words)
94
+ end
95
+
96
+ private
97
+
98
+ def measure
99
+ MATCHRANK.new(self)
100
+ end
44
101
  end
45
102
  end
46
103
 
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/regex_me/regex_me'
@@ -0,0 +1,99 @@
1
+ require File.dirname(__FILE__) + '/configurations'
2
+ require File.dirname(__FILE__) + '/helpers'
3
+
4
+ module RegexMe
5
+ module Builders
6
+ include RegexMe::Configurations::Builder
7
+
8
+ def regex_me_to_search_ruby(options = {})
9
+ regex_me_to_search(:ruby, options)
10
+ end
11
+
12
+ def regex_me_to_search_mysql(options = {})
13
+ regex_me_to_search(:mysql, options)
14
+ end
15
+
16
+ private
17
+
18
+ def options_handler(options)
19
+ handled = \
20
+ {:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
21
+ :multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
22
+ :or => (options[:or] == false ? false : true)}
23
+
24
+ return options.merge(handled)
25
+ end
26
+
27
+ def regex_me_to_search(env, options)
28
+ return EMPTYs[env] if self.strip.empty?
29
+
30
+ execute_builder(env, options)
31
+ end
32
+
33
+ def execute_builder(env, options)
34
+ opt_handled = options_handler(options)
35
+
36
+ builder_result = builder(env, opt_handled)
37
+
38
+ case env
39
+ when :ruby
40
+ options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
41
+ Regexp.new(builder_result, *options)
42
+ when :mysql
43
+ builder_result
44
+ end
45
+ end
46
+
47
+ def builder(border_to, options)
48
+ string = self
49
+
50
+ lcv = options[:latin_chars_variations]
51
+
52
+ if options[:exact_phrase]
53
+ @regexp = \
54
+ string \
55
+ .strip.simple_space \
56
+ .regex_latin_ci_list \
57
+ .gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
58
+ .regex_builder(:or => false,
59
+ :border => {:to => border_to,
60
+ :direction => :both})
61
+ else
62
+ @regexp = '('
63
+
64
+ for word in string.strip.split
65
+ if options[:exact_word]
66
+ @regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
67
+ elsif have_the_any_char?(word)
68
+ @regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
69
+ else
70
+ @regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
71
+ end
72
+ end
73
+
74
+ @regexp = (@regexp << ')').sub!(/\|\)/,')')
75
+ end
76
+
77
+ return @regexp
78
+ end
79
+
80
+ def have_the_any_char?(string)
81
+ string.include?('*')
82
+ end
83
+
84
+ def border(to, word)
85
+ direction = nil
86
+
87
+ case word
88
+ when/^\*/
89
+ direction = :right
90
+ when /\*$/
91
+ direction = :left
92
+ when /^.*\*.*$/
93
+ direction = :both
94
+ end
95
+
96
+ {:to => to, :direction => direction}
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,10 @@
1
+ module RegexMe
2
+ module Configurations
3
+ module Builder
4
+ EMPTYs = {:ruby => //, :mysql => ''}
5
+ WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
6
+ end
7
+ end
8
+ end
9
+
10
+
@@ -0,0 +1,102 @@
1
+ # coding: utf-8
2
+
3
+ module StringUtilityBelt
4
+ module RegexMe
5
+ module Helper
6
+ A_VARIATIONS = "(a|à|á|â|ã|ä)"
7
+ E_VARIATIONS = "(e|è|é|ê|ë)"
8
+ I_VARIATIONS = "(i|ì|í|î|ï)"
9
+ O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
10
+ U_VARIATIONS = "(u|ù|ú|û|ü)"
11
+ C_VARIATIONS = "(c|ç)"
12
+ N_VARIATIONS = "(n|ñ)"
13
+
14
+ LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
15
+ E_VARIATIONS,
16
+ I_VARIATIONS,
17
+ O_VARIATIONS,
18
+ U_VARIATIONS,
19
+ C_VARIATIONS,
20
+ N_VARIATIONS]
21
+
22
+ BORDER_TO = {
23
+ :ruby => {:left => '\b', :right => '\b' },
24
+ :mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
25
+ }
26
+
27
+ def regex_latin_ci_list
28
+ memo = ""
29
+
30
+ self.each_char do |char|
31
+ changed = false
32
+
33
+ for variations in LATIN_CHARS_VARIATIONS
34
+ variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
35
+
36
+ if char =~ variations_pattern
37
+ changed = true
38
+ memo.insert(-1, variations)
39
+ break
40
+ end
41
+ end
42
+
43
+ memo.insert(-1, char) unless changed
44
+ end
45
+
46
+ self.replace(memo)
47
+ end
48
+
49
+ def regex_builder(options)
50
+ if options[:any]
51
+ replace_the_any_char_per_any_pattern
52
+ end
53
+
54
+ if options[:latin_chars_variations]
55
+ replace_chars_includeds_in_latin_variation_list
56
+ end
57
+
58
+ if options[:border]
59
+ insert_border(options[:border])
60
+ end
61
+
62
+ if options[:or]
63
+ insert_OR
64
+ end
65
+
66
+ return self
67
+ end
68
+
69
+ private
70
+ def replace_the_any_char_per_any_pattern
71
+ self.gsub!(/\*/, '.*')
72
+ end
73
+
74
+ def replace_chars_includeds_in_latin_variation_list
75
+ self.regex_latin_ci_list
76
+ end
77
+
78
+ def insert_border(options)
79
+ border = BORDER_TO[options[:to]]
80
+
81
+ case options[:direction]
82
+ when :left
83
+ self.insert(0, border[:left])
84
+ when :right
85
+ self.insert(-1, border[:right])
86
+ when :both
87
+ self.insert(0, border[:left]).insert(-1, border[:right])
88
+ else
89
+ self
90
+ end
91
+ end
92
+
93
+ def insert_OR
94
+ self.insert(-1, "|")
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ class String
101
+ include StringUtilityBelt::RegexMe::Helper
102
+ end
@@ -0,0 +1,5 @@
1
+ require File.dirname(__FILE__) + '/lib/builders.rb'
2
+
3
+ class String
4
+ include RegexMe::Builders
5
+ end
@@ -1,3 +1,3 @@
1
1
  module StringUtilityBelt
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_utility_belt
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 0
10
- version: 0.3.0
9
+ - 1
10
+ version: 0.3.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Rodrigo Serradura
@@ -15,10 +15,25 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-07 00:00:00 Z
19
- dependencies: []
20
-
21
- description: Adiciona novas funcionalidades para strings
18
+ date: 2011-07-12 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: htmlentities
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - "="
27
+ - !ruby/object:Gem::Version
28
+ hash: 51
29
+ segments:
30
+ - 4
31
+ - 3
32
+ - 0
33
+ version: 4.3.0
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ description: Adds new features for String objects.
22
37
  email:
23
38
  - rserradura@gmail.com
24
39
  executables: []
@@ -37,8 +52,11 @@ files:
37
52
  - lib/string_utility_belt/entities.rb
38
53
  - lib/string_utility_belt/general.rb
39
54
  - lib/string_utility_belt/match_rank.rb
40
- - lib/string_utility_belt/regex_me_helper.rb
41
- - lib/string_utility_belt/regex_me_to_search.rb
55
+ - lib/string_utility_belt/regex_me.rb
56
+ - lib/string_utility_belt/regex_me/lib/builders.rb
57
+ - lib/string_utility_belt/regex_me/lib/configurations.rb
58
+ - lib/string_utility_belt/regex_me/lib/helpers.rb
59
+ - lib/string_utility_belt/regex_me/regex_me.rb
42
60
  - lib/string_utility_belt/tags.rb
43
61
  - lib/string_utility_belt/version.rb
44
62
  - test/string_utility_belt/entities_test.rb
@@ -80,6 +98,6 @@ rubyforge_project: string_utility_belt
80
98
  rubygems_version: 1.8.2
81
99
  signing_key:
82
100
  specification_version: 3
83
- summary: Metodos uteis para strings
101
+ summary: Useful methods to handle strings
84
102
  test_files: []
85
103
 
@@ -1,100 +0,0 @@
1
- # coding: utf-8
2
-
3
- module RegexMe
4
- module Helper
5
- A_VARIATIONS = "(a|à|á|â|ã|ä)"
6
- E_VARIATIONS = "(e|è|é|ê|ë)"
7
- I_VARIATIONS = "(i|ì|í|î|ï)"
8
- O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
9
- U_VARIATIONS = "(u|ù|ú|û|ü)"
10
- C_VARIATIONS = "(c|ç)"
11
- N_VARIATIONS = "(n|ñ)"
12
-
13
- LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
14
- E_VARIATIONS,
15
- I_VARIATIONS,
16
- O_VARIATIONS,
17
- U_VARIATIONS,
18
- C_VARIATIONS,
19
- N_VARIATIONS]
20
-
21
- BORDER_TO = {
22
- :ruby => {:left => '\b', :right => '\b' },
23
- :mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
24
- }
25
-
26
- def regex_latin_ci_list
27
- memo = ""
28
-
29
- self.each_char do |char|
30
- changed = false
31
-
32
- for variations in LATIN_CHARS_VARIATIONS
33
- variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
34
-
35
- if char =~ variations_pattern
36
- changed = true
37
- memo.insert(-1, variations)
38
- break
39
- end
40
- end
41
-
42
- memo.insert(-1, char) unless changed
43
- end
44
-
45
- self.replace(memo)
46
- end
47
-
48
- def regex_builder(options)
49
- if options[:any]
50
- replace_the_any_char_per_any_pattern
51
- end
52
-
53
- if options[:latin_chars_variations]
54
- replace_chars_includeds_in_latin_variation_list
55
- end
56
-
57
- if options[:border]
58
- insert_border(options[:border])
59
- end
60
-
61
- if options[:or]
62
- insert_OR
63
- end
64
-
65
- return self
66
- end
67
-
68
- private
69
- def replace_the_any_char_per_any_pattern
70
- self.gsub!(/\*/, '.*')
71
- end
72
-
73
- def replace_chars_includeds_in_latin_variation_list
74
- self.regex_latin_ci_list
75
- end
76
-
77
- def insert_border(options)
78
- border = BORDER_TO[options[:to]]
79
-
80
- case options[:direction]
81
- when :left
82
- self.insert(0, border[:left])
83
- when :right
84
- self.insert(-1, border[:right])
85
- when :both
86
- self.insert(0, border[:left]).insert(-1, border[:right])
87
- else
88
- self
89
- end
90
- end
91
-
92
- def insert_OR
93
- self.insert(-1, "|")
94
- end
95
- end
96
- end
97
-
98
- class String
99
- include RegexMe::Helper
100
- end
@@ -1,107 +0,0 @@
1
- require 'string_utility_belt/regex_me_helper'
2
-
3
- module StringUtilityBelt
4
- module RegexMe
5
- EMPTYs = {:ruby => //, :mysql => ''}
6
- WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
7
-
8
- module To
9
- module Search
10
- def regex_me_to_search_ruby(options = {})
11
- regex_me_to_search(:ruby, options)
12
- end
13
-
14
- def regex_me_to_search_mysql(options = {})
15
- regex_me_to_search(:mysql, options)
16
- end
17
-
18
- private
19
-
20
- def options_handler(options)
21
- handled = \
22
- {:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
23
- :multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
24
- :or => (options[:or] == false ? false : true)}
25
-
26
- return options.merge(handled)
27
- end
28
-
29
- def regex_me_to_search(env, options)
30
- return EMPTYs[env] if self.strip.empty?
31
-
32
- execute_builder(env, options)
33
- end
34
-
35
- def execute_builder(env, options)
36
- opt_handled = options_handler(options)
37
-
38
- builder_result = builder(env, opt_handled)
39
-
40
- case env
41
- when :ruby
42
- options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
43
- Regexp.new(builder_result, *options)
44
- when :mysql
45
- builder_result
46
- end
47
- end
48
-
49
- def builder(border_to, options)
50
- string = self
51
-
52
- lcv = options[:latin_chars_variations]
53
-
54
- if options[:exact_phrase]
55
- @regexp = \
56
- string \
57
- .strip.simple_space \
58
- .regex_latin_ci_list \
59
- .gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
60
- .regex_builder(:or => false,
61
- :border => {:to => border_to,
62
- :direction => :both})
63
- else
64
- @regexp = '('
65
-
66
- for word in string.strip.split
67
- if options[:exact_word]
68
- @regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
69
- elsif have_the_any_char?(word)
70
- @regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
71
- else
72
- @regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
73
- end
74
- end
75
-
76
- @regexp = (@regexp << ')').sub!(/\|\)/,')')
77
- end
78
-
79
- return @regexp
80
- end
81
-
82
- def have_the_any_char?(string)
83
- string.include?('*')
84
- end
85
-
86
- def border(to, word)
87
- direction = nil
88
-
89
- case word
90
- when/^\*/
91
- direction = :right
92
- when /\*$/
93
- direction = :left
94
- when /^.*\*.*$/
95
- direction = :both
96
- end
97
-
98
- {:to => to, :direction => direction}
99
- end
100
- end
101
- end
102
- end
103
- end
104
-
105
- class String
106
- include StringUtilityBelt::RegexMe::To::Search
107
- end