string_utility_belt 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  require 'string_utility_belt/version'
2
2
 
3
- require 'string_utility_belt/regex_me_to_search'
3
+ require 'string_utility_belt/regex_me'
4
4
  require 'string_utility_belt/general'
5
5
  require 'string_utility_belt/match_rank'
6
6
  require 'string_utility_belt/tags'
@@ -1,41 +1,48 @@
1
- require 'string_utility_belt/regex_me_to_search'
1
+ require 'string_utility_belt/regex_me'
2
2
 
3
3
  module StringUtilityBelt
4
4
  module General
5
5
  class GENERAL
6
6
  CASE_INSENSITIVE_OPT = {:case_insensitive => true}
7
7
 
8
- def have_this_words?(string, words_to_match, options)
9
- @string = string
8
+ def initialize(string)
9
+ @string = string
10
+ end
11
+
12
+ def have_this_words?(words_to_match, options)
10
13
  @arguments = options
11
14
 
12
15
  for word in words_to_match
13
- return false if string_does_not_match_with_this_word_pattern?(word)
16
+ return false if not word_is_found_in_the_string?(word)
14
17
  end
15
18
 
16
19
  return true
17
20
  end
18
21
 
19
22
  private
20
- def string_does_not_match_with_this_word_pattern?(word)
21
- @string !~ word.regex_me_to_search_ruby(arguments)
23
+ def word_is_found_in_the_string?(word)
24
+ @string =~ word.regex_me_to_search_ruby(arguments)
22
25
  end
23
26
 
24
27
  def arguments
25
- if is_boolean?
28
+ if args_is_nil? or args_is_boolean?
26
29
  CASE_INSENSITIVE_OPT.merge({:exact_word => @arguments})
27
- elsif is_hash?
30
+ elsif args_is_hash?
28
31
  @arguments.merge(CASE_INSENSITIVE_OPT)
29
32
  end
30
33
  end
31
34
 
32
- def is_boolean?
35
+ def args_is_boolean?
33
36
  @arguments.instance_of?(FalseClass) || @arguments.instance_of?(TrueClass)
34
37
  end
35
38
 
36
- def is_hash?
39
+ def args_is_hash?
37
40
  @arguments.instance_of?(Hash)
38
41
  end
42
+
43
+ def args_is_nil?
44
+ @arguments.nil?
45
+ end
39
46
  end
40
47
 
41
48
  WORD_PATTERN = /\w[\w\'\-]*/
@@ -55,14 +62,14 @@ module StringUtilityBelt
55
62
  self.gsub!(ANY_SPACE_PATTERN, SIMPLE_SPACE)
56
63
  end
57
64
 
58
- def have_this_words?(words_to_match, options = false)
59
- i = GENERAL.new
60
- i.have_this_words?(self, words_to_match, options)
65
+ def have_this_words?(words_to_match, options = nil)
66
+ i = GENERAL.new(self)
67
+ i.have_this_words?(words_to_match, options)
61
68
  end
62
69
 
63
- def not_have_this_words?(words_to_match, options = false)
64
- i = GENERAL.new
65
- !i.have_this_words?(self, words_to_match, options)
70
+ def not_have_this_words?(words_to_match, options = nil)
71
+ i = GENERAL.new(self)
72
+ !i.have_this_words?(words_to_match, options)
66
73
  end
67
74
  end
68
75
  end
@@ -1,46 +1,103 @@
1
- require 'string_utility_belt/regex_me_to_search'
1
+ require 'string_utility_belt/regex_me'
2
2
 
3
3
  module StringUtilityBelt
4
4
  module MatchRank
5
+ class MATCHRANK
6
+ TOTAL_TEMPLATE = {:exact => 0, :matched => 0}
7
+ SCORE_TEMPLATE = TOTAL_TEMPLATE.merge({:precision => 0})
5
8
 
6
- def total_frequency_by words_to_match
7
- frequency_by(words_to_match, 0, 0) do |freq, word_to_match, word|
8
- freq[:exact] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
9
- freq[:matched] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
9
+ def initialize(string)
10
+ @text = string
10
11
  end
11
- end
12
12
 
13
- def words_frequency_by words_to_match
14
- frequency_by(words_to_match, Hash.new(0), Hash.new(0)) do |freq, word_to_match, word|
15
- freq[:exact][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => true , :case_insensitive => true)
16
- freq[:matched][word_to_match] += 1 if word =~ word_to_match.regex_me_to_search_ruby(:exact_word => false , :case_insensitive => true)
13
+ def frequency(options)
14
+ @options = options
15
+ @result = result_template
16
+
17
+ for @search_word in search_words
18
+ for @text_word in @text.words
19
+ count_exact_matches
20
+ count_fragmet_matches
21
+ end
22
+ end
23
+
24
+ calculate_the_precision if @options[:template] == :precision
25
+
26
+ return @result
17
27
  end
18
- end
19
28
 
20
- def match_and_score_by words_to_match
21
- freq = self.total_frequency_by words_to_match
22
- statistic = {:exact => freq[:exact].to_f, :matched => freq[:matched].to_f, :precision => 0.0}
29
+ private
30
+
31
+ def search_words
32
+ words = @options[:search_words]
33
+ words.instance_of?(Array) ? words : words.to_s.words
34
+ end
23
35
 
24
- statistic[:precision] = (statistic[:exact] / statistic[:matched]) * 100
36
+ def count_exact_matches
37
+ incr_result(:exact) if find_the_wanted_word(:with_precision => true)
38
+ end
25
39
 
26
- return statistic
27
- end
40
+ def count_fragmet_matches
41
+ incr_result(:matched) if find_the_wanted_word(:with_precision => false)
42
+ end
28
43
 
29
- private
44
+ def calculate_the_precision
45
+ @result[:precision] = (@result[:exact].to_f / @result[:matched].to_f) * 100
46
+ end
47
+
48
+ def find_the_wanted_word(option)
49
+ @text_word =~ matcher(option[:with_precision])
50
+ end
30
51
 
31
- def frequency_by words_to_match, frequency_object_a, frequency_object_b
32
- self_words = self.words
33
- freq = {:exact => frequency_object_a, :matched => frequency_object_b}
52
+ def matcher(precision)
53
+ @search_word.regex_me_to_search_ruby(:exact_word => precision,
54
+ :case_insensitive => true)
55
+ end
34
56
 
35
- for word_to_match in words_to_match
36
- for word in self_words
37
- yield freq, word_to_match, word
57
+ def incr_result(key)
58
+ case @options[:template]
59
+ when :grouped_words
60
+ begin
61
+ @result[key][@search_word] += 1
62
+ rescue
63
+ @result[key][@search_word] = 0
64
+ @result[key][@search_word] += 1
65
+ end
66
+ else
67
+ @result[key] += 1
38
68
  end
39
69
  end
40
70
 
41
- return freq
71
+ def result_template
72
+ templates.fetch(@options[:template]).clone
73
+ end
74
+
75
+ def templates
76
+ {
77
+ :total => TOTAL_TEMPLATE,
78
+ :precision => SCORE_TEMPLATE,
79
+ :grouped_words => {:exact => {}, :matched => {}}
80
+ }
81
+ end
82
+ end
83
+
84
+ def total_frequency_by(words)
85
+ measure.frequency(:template => :total, :search_words => words)
42
86
  end
43
87
 
88
+ def words_frequency_by(words)
89
+ measure.frequency(:template => :grouped_words, :search_words => words)
90
+ end
91
+
92
+ def match_and_score_by(words)
93
+ measure.frequency(:template => :precision, :search_words => words)
94
+ end
95
+
96
+ private
97
+
98
+ def measure
99
+ MATCHRANK.new(self)
100
+ end
44
101
  end
45
102
  end
46
103
 
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/regex_me/regex_me'
@@ -0,0 +1,99 @@
1
+ require File.dirname(__FILE__) + '/configurations'
2
+ require File.dirname(__FILE__) + '/helpers'
3
+
4
+ module RegexMe
5
+ module Builders
6
+ include RegexMe::Configurations::Builder
7
+
8
+ def regex_me_to_search_ruby(options = {})
9
+ regex_me_to_search(:ruby, options)
10
+ end
11
+
12
+ def regex_me_to_search_mysql(options = {})
13
+ regex_me_to_search(:mysql, options)
14
+ end
15
+
16
+ private
17
+
18
+ def options_handler(options)
19
+ handled = \
20
+ {:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
21
+ :multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
22
+ :or => (options[:or] == false ? false : true)}
23
+
24
+ return options.merge(handled)
25
+ end
26
+
27
+ def regex_me_to_search(env, options)
28
+ return EMPTYs[env] if self.strip.empty?
29
+
30
+ execute_builder(env, options)
31
+ end
32
+
33
+ def execute_builder(env, options)
34
+ opt_handled = options_handler(options)
35
+
36
+ builder_result = builder(env, opt_handled)
37
+
38
+ case env
39
+ when :ruby
40
+ options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
41
+ Regexp.new(builder_result, *options)
42
+ when :mysql
43
+ builder_result
44
+ end
45
+ end
46
+
47
+ def builder(border_to, options)
48
+ string = self
49
+
50
+ lcv = options[:latin_chars_variations]
51
+
52
+ if options[:exact_phrase]
53
+ @regexp = \
54
+ string \
55
+ .strip.simple_space \
56
+ .regex_latin_ci_list \
57
+ .gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
58
+ .regex_builder(:or => false,
59
+ :border => {:to => border_to,
60
+ :direction => :both})
61
+ else
62
+ @regexp = '('
63
+
64
+ for word in string.strip.split
65
+ if options[:exact_word]
66
+ @regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
67
+ elsif have_the_any_char?(word)
68
+ @regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
69
+ else
70
+ @regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
71
+ end
72
+ end
73
+
74
+ @regexp = (@regexp << ')').sub!(/\|\)/,')')
75
+ end
76
+
77
+ return @regexp
78
+ end
79
+
80
+ def have_the_any_char?(string)
81
+ string.include?('*')
82
+ end
83
+
84
+ def border(to, word)
85
+ direction = nil
86
+
87
+ case word
88
+ when/^\*/
89
+ direction = :right
90
+ when /\*$/
91
+ direction = :left
92
+ when /^.*\*.*$/
93
+ direction = :both
94
+ end
95
+
96
+ {:to => to, :direction => direction}
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,10 @@
1
+ module RegexMe
2
+ module Configurations
3
+ module Builder
4
+ EMPTYs = {:ruby => //, :mysql => ''}
5
+ WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
6
+ end
7
+ end
8
+ end
9
+
10
+
@@ -0,0 +1,102 @@
1
+ # coding: utf-8
2
+
3
+ module StringUtilityBelt
4
+ module RegexMe
5
+ module Helper
6
+ A_VARIATIONS = "(a|à|á|â|ã|ä)"
7
+ E_VARIATIONS = "(e|è|é|ê|ë)"
8
+ I_VARIATIONS = "(i|ì|í|î|ï)"
9
+ O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
10
+ U_VARIATIONS = "(u|ù|ú|û|ü)"
11
+ C_VARIATIONS = "(c|ç)"
12
+ N_VARIATIONS = "(n|ñ)"
13
+
14
+ LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
15
+ E_VARIATIONS,
16
+ I_VARIATIONS,
17
+ O_VARIATIONS,
18
+ U_VARIATIONS,
19
+ C_VARIATIONS,
20
+ N_VARIATIONS]
21
+
22
+ BORDER_TO = {
23
+ :ruby => {:left => '\b', :right => '\b' },
24
+ :mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
25
+ }
26
+
27
+ def regex_latin_ci_list
28
+ memo = ""
29
+
30
+ self.each_char do |char|
31
+ changed = false
32
+
33
+ for variations in LATIN_CHARS_VARIATIONS
34
+ variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
35
+
36
+ if char =~ variations_pattern
37
+ changed = true
38
+ memo.insert(-1, variations)
39
+ break
40
+ end
41
+ end
42
+
43
+ memo.insert(-1, char) unless changed
44
+ end
45
+
46
+ self.replace(memo)
47
+ end
48
+
49
+ def regex_builder(options)
50
+ if options[:any]
51
+ replace_the_any_char_per_any_pattern
52
+ end
53
+
54
+ if options[:latin_chars_variations]
55
+ replace_chars_includeds_in_latin_variation_list
56
+ end
57
+
58
+ if options[:border]
59
+ insert_border(options[:border])
60
+ end
61
+
62
+ if options[:or]
63
+ insert_OR
64
+ end
65
+
66
+ return self
67
+ end
68
+
69
+ private
70
+ def replace_the_any_char_per_any_pattern
71
+ self.gsub!(/\*/, '.*')
72
+ end
73
+
74
+ def replace_chars_includeds_in_latin_variation_list
75
+ self.regex_latin_ci_list
76
+ end
77
+
78
+ def insert_border(options)
79
+ border = BORDER_TO[options[:to]]
80
+
81
+ case options[:direction]
82
+ when :left
83
+ self.insert(0, border[:left])
84
+ when :right
85
+ self.insert(-1, border[:right])
86
+ when :both
87
+ self.insert(0, border[:left]).insert(-1, border[:right])
88
+ else
89
+ self
90
+ end
91
+ end
92
+
93
+ def insert_OR
94
+ self.insert(-1, "|")
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ class String
101
+ include StringUtilityBelt::RegexMe::Helper
102
+ end
@@ -0,0 +1,5 @@
1
+ require File.dirname(__FILE__) + '/lib/builders.rb'
2
+
3
+ class String
4
+ include RegexMe::Builders
5
+ end
@@ -1,3 +1,3 @@
1
1
  module StringUtilityBelt
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_utility_belt
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 0
10
- version: 0.3.0
9
+ - 1
10
+ version: 0.3.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Rodrigo Serradura
@@ -15,10 +15,25 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-07 00:00:00 Z
19
- dependencies: []
20
-
21
- description: Adiciona novas funcionalidades para strings
18
+ date: 2011-07-12 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: htmlentities
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - "="
27
+ - !ruby/object:Gem::Version
28
+ hash: 51
29
+ segments:
30
+ - 4
31
+ - 3
32
+ - 0
33
+ version: 4.3.0
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ description: Adds new features for String objects.
22
37
  email:
23
38
  - rserradura@gmail.com
24
39
  executables: []
@@ -37,8 +52,11 @@ files:
37
52
  - lib/string_utility_belt/entities.rb
38
53
  - lib/string_utility_belt/general.rb
39
54
  - lib/string_utility_belt/match_rank.rb
40
- - lib/string_utility_belt/regex_me_helper.rb
41
- - lib/string_utility_belt/regex_me_to_search.rb
55
+ - lib/string_utility_belt/regex_me.rb
56
+ - lib/string_utility_belt/regex_me/lib/builders.rb
57
+ - lib/string_utility_belt/regex_me/lib/configurations.rb
58
+ - lib/string_utility_belt/regex_me/lib/helpers.rb
59
+ - lib/string_utility_belt/regex_me/regex_me.rb
42
60
  - lib/string_utility_belt/tags.rb
43
61
  - lib/string_utility_belt/version.rb
44
62
  - test/string_utility_belt/entities_test.rb
@@ -80,6 +98,6 @@ rubyforge_project: string_utility_belt
80
98
  rubygems_version: 1.8.2
81
99
  signing_key:
82
100
  specification_version: 3
83
- summary: Metodos uteis para strings
101
+ summary: Useful methods to handle strings
84
102
  test_files: []
85
103
 
@@ -1,100 +0,0 @@
1
- # coding: utf-8
2
-
3
- module RegexMe
4
- module Helper
5
- A_VARIATIONS = "(a|à|á|â|ã|ä)"
6
- E_VARIATIONS = "(e|è|é|ê|ë)"
7
- I_VARIATIONS = "(i|ì|í|î|ï)"
8
- O_VARIATIONS = "(o|ò|ó|ô|õ|ö)"
9
- U_VARIATIONS = "(u|ù|ú|û|ü)"
10
- C_VARIATIONS = "(c|ç)"
11
- N_VARIATIONS = "(n|ñ)"
12
-
13
- LATIN_CHARS_VARIATIONS = [A_VARIATIONS,
14
- E_VARIATIONS,
15
- I_VARIATIONS,
16
- O_VARIATIONS,
17
- U_VARIATIONS,
18
- C_VARIATIONS,
19
- N_VARIATIONS]
20
-
21
- BORDER_TO = {
22
- :ruby => {:left => '\b', :right => '\b' },
23
- :mysql => {:left => '[[:<:]]', :right => '[[:>:]]' }
24
- }
25
-
26
- def regex_latin_ci_list
27
- memo = ""
28
-
29
- self.each_char do |char|
30
- changed = false
31
-
32
- for variations in LATIN_CHARS_VARIATIONS
33
- variations_pattern = Regexp.new(variations, Regexp::IGNORECASE)
34
-
35
- if char =~ variations_pattern
36
- changed = true
37
- memo.insert(-1, variations)
38
- break
39
- end
40
- end
41
-
42
- memo.insert(-1, char) unless changed
43
- end
44
-
45
- self.replace(memo)
46
- end
47
-
48
- def regex_builder(options)
49
- if options[:any]
50
- replace_the_any_char_per_any_pattern
51
- end
52
-
53
- if options[:latin_chars_variations]
54
- replace_chars_includeds_in_latin_variation_list
55
- end
56
-
57
- if options[:border]
58
- insert_border(options[:border])
59
- end
60
-
61
- if options[:or]
62
- insert_OR
63
- end
64
-
65
- return self
66
- end
67
-
68
- private
69
- def replace_the_any_char_per_any_pattern
70
- self.gsub!(/\*/, '.*')
71
- end
72
-
73
- def replace_chars_includeds_in_latin_variation_list
74
- self.regex_latin_ci_list
75
- end
76
-
77
- def insert_border(options)
78
- border = BORDER_TO[options[:to]]
79
-
80
- case options[:direction]
81
- when :left
82
- self.insert(0, border[:left])
83
- when :right
84
- self.insert(-1, border[:right])
85
- when :both
86
- self.insert(0, border[:left]).insert(-1, border[:right])
87
- else
88
- self
89
- end
90
- end
91
-
92
- def insert_OR
93
- self.insert(-1, "|")
94
- end
95
- end
96
- end
97
-
98
- class String
99
- include RegexMe::Helper
100
- end
@@ -1,107 +0,0 @@
1
- require 'string_utility_belt/regex_me_helper'
2
-
3
- module StringUtilityBelt
4
- module RegexMe
5
- EMPTYs = {:ruby => //, :mysql => ''}
6
- WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES = '[^0-9a-zA-Z\_]+'
7
-
8
- module To
9
- module Search
10
- def regex_me_to_search_ruby(options = {})
11
- regex_me_to_search(:ruby, options)
12
- end
13
-
14
- def regex_me_to_search_mysql(options = {})
15
- regex_me_to_search(:mysql, options)
16
- end
17
-
18
- private
19
-
20
- def options_handler(options)
21
- handled = \
22
- {:case_insensitive => (options[:case_insensitive] ? Regexp::IGNORECASE : nil ),
23
- :multiline => (options[:multiline] ? Regexp::MULTILINE : nil ),
24
- :or => (options[:or] == false ? false : true)}
25
-
26
- return options.merge(handled)
27
- end
28
-
29
- def regex_me_to_search(env, options)
30
- return EMPTYs[env] if self.strip.empty?
31
-
32
- execute_builder(env, options)
33
- end
34
-
35
- def execute_builder(env, options)
36
- opt_handled = options_handler(options)
37
-
38
- builder_result = builder(env, opt_handled)
39
-
40
- case env
41
- when :ruby
42
- options = [opt_handled[:case_insensitive], opt_handled[:multiline]].compact
43
- Regexp.new(builder_result, *options)
44
- when :mysql
45
- builder_result
46
- end
47
- end
48
-
49
- def builder(border_to, options)
50
- string = self
51
-
52
- lcv = options[:latin_chars_variations]
53
-
54
- if options[:exact_phrase]
55
- @regexp = \
56
- string \
57
- .strip.simple_space \
58
- .regex_latin_ci_list \
59
- .gsub(/\s/, WORDS_INTERVAL_PATTERN_FOR_EXACT_PHRASES) \
60
- .regex_builder(:or => false,
61
- :border => {:to => border_to,
62
- :direction => :both})
63
- else
64
- @regexp = '('
65
-
66
- for word in string.strip.split
67
- if options[:exact_word]
68
- @regexp << word.regex_builder(:border => {:to => border_to, :direction => :both}, :latin_chars_variations => lcv, :or => true)
69
- elsif have_the_any_char?(word)
70
- @regexp << word.regex_builder(:any => true, :border => border(border_to, word) , :latin_chars_variations => lcv, :or => true)
71
- else
72
- @regexp << word.regex_builder(:latin_chars_variations => lcv, :or => true)
73
- end
74
- end
75
-
76
- @regexp = (@regexp << ')').sub!(/\|\)/,')')
77
- end
78
-
79
- return @regexp
80
- end
81
-
82
- def have_the_any_char?(string)
83
- string.include?('*')
84
- end
85
-
86
- def border(to, word)
87
- direction = nil
88
-
89
- case word
90
- when/^\*/
91
- direction = :right
92
- when /\*$/
93
- direction = :left
94
- when /^.*\*.*$/
95
- direction = :both
96
- end
97
-
98
- {:to => to, :direction => direction}
99
- end
100
- end
101
- end
102
- end
103
- end
104
-
105
- class String
106
- include StringUtilityBelt::RegexMe::To::Search
107
- end