RubyGems - fk_str - Versions diffs - 0.0.1 - Mend

fk_str 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+ Copyright (c) 2012 Guilherme Baptista
+ https://github.com/gbaptista/fk_str
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.

data/Rakefile ADDED Viewed

@@ -0,0 +1,8 @@
+require 'rake/testtask'
+Rake::TestTask.new do |t|
+	t.libs << 'test'
+end
+desc 'Run tests'
+task :default => :test

data/lib/fk_str.rb ADDED Viewed

@@ -0,0 +1,497 @@
+# encoding: utf-8
+require 'date'
+require 'fk_str/dictionary'
+module FkStr
+	def self.treat_encoding str, debug=false
+		str_r = ''
+		str.lines.each_with_index { |l, i| str_r += ' ' + self.treat_encoding_s(l, debug) if !debug or (i > -1 and i < 1) }
+		return str_r.strip
+	end
+	def self.is_eq str, str_b, pct=1
+		str = self.to_term str, true
+		str_b = self.to_term str_b, true
+		str_c = str.concat str_b
+		return true if (100-(100*str_c.uniq.size/str_c.size)) >= pct
+		return false
+	end
+	def self.to_slug str
+		return str if str.to_s == ''
+		return self.remove_accents(str).gsub(/\s{1,}| {1,}/, ' ').gsub(/[\+\/_\-|:@#\\,]/, ' ').gsub('&', 'e').gsub(/[^a-zA-Z0-9 ]/, '').downcase.gsub(/\s{1,}| {1,}/, ' ').strip.gsub(' ', '-')
+	end
+	def self.to_term str, ar=false
+		return str if str.to_s == ''
+		str_ar = []
+		self.to_slug(str).split('-').each do |s|
+			s.split('').uniq.each { |r| s = s.gsub /#{r}{2,}/, r }
+			@@simple_downcase_consonants.each { |c| s = s.gsub /#{c}(h|r|l|u)/, c }
+			if !s.empty? and !@@countries_acronyms.include? s and !@@articles_and_others.include? s
+				s = s.gsub /m/, 'n'
+				s = s.gsub /l/, 'r'
+				s = s.gsub /z/, 's'
+				s = s.gsub /g/, 'j'
+				s = s.gsub /e|y/, 'i'
+				s = s.gsub /o|w/, 'u'
+				s = s.gsub /c|q/, 'k'
+				s.split('').uniq.each { |r| s = s.gsub /#{r}{2,}/, r }
+				s = s.gsub /(r|s|n)$/, ''
+				str_ar << s if !s.empty?
+			end
+		end
+		return str_ar.uniq if ar
+		return str_ar.uniq.join
+	end
+	def self.remove_accents str
+		return '' if str.to_s == ''
+		str = str.gsub(/[ÁÃÂÀÄĂĀÅÆ]/, 'A').gsub(/[áãâàäăāåæ]/, 'a')
+		str = str.gsub(/[ÉẼÊÈËĔĒ]/, 'E').gsub(/[éẽêèëĕē]/, 'e')
+		str = str.gsub(/[ÍĨÎÌÏĬĪ]/, 'I').gsub(/[íĩîìïĭī]/, 'i')
+		str = str.gsub(/[ÓÕÔÒÖŎŌŐÐ]/, 'O').gsub(/[óõôòöŏōőð]/, 'o')
+		str = str.gsub(/[ÚŨÛÙÜŬŪǕ]/, 'U').gsub(/[úũûùüŭūǖ]/, 'u')
+		str = str.gsub(/[ÇČ]/, 'C').gsub(/[çč]/, 'c').gsub(/Ğ/, 'G').gsub(/ğ/, 'g').gsub(/Ñ/, 'N').gsub(/ñ/, 'n').gsub(/Š/, 'S').gsub(/š/, 's')
+		str = str.gsub(/[ȲŸÝỲ]/, 'Y').gsub(/[ȳÿýỳ]/, 'y').gsub(/Ž/, 'Z').gsub(/ž/, 'z')
+		return str
+	end
+	# 35 seconds
+	# 18 seconds
+	# 16 seconds
+	def self.upcasewords str
+		return str if str.to_s == ''
+		# Trata espaçamentos duplicados ou inválidos.
+		str = str.gsub(/\s{1,}| {1,}/, ' ').strip
+		rstr = []
+		str.split(' ').each { |w| rstr << upcaseword(w) }
+		str = rstr.join(' ')
+		# Trata espaçamentos duplicados ou inválidos.
+		str = str.gsub(/\s{1,}| {1,}/, ' ')
+		# Maiúsculo na primeira letra
+		fl = @@letters_by_letter[remove_accents(str[0]).downcase]
+		fl.each { |l| str[0] = str[0].gsub(l[0], l[1]) } if fl
+		return str
+	end
+	def self.remove_if_ends_with str, texts, not_change_if_returns_with=nil, if_not_change_returns_with_last_removed=0
+		return str if str.split(' ').size == 1
+		texts.each_with_index { |t, i| texts.delete_at i if t == '' }
+		str_o = str
+		str = str.strip
+		str_t = self.remove_accents(str).downcase
+		texts = texts.uniq
+		texts.each_with_index { |v, i| texts[i] = self.remove_accents(v).downcase }
+		not_change_if_returns_with.each_with_index { |v, i| not_change_if_returns_with[i] = self.remove_accents(v).downcase } if !not_change_if_returns_with.nil?
+		removed = []
+		continue = true
+		while continue
+			continue = false
+			texts.each do |t|
+				# Se o final da string for igual ao termo...
+				if t == str_t[str_t.size-t.size..str_t.size].to_s
+					# Se antes do termo final na string não for igual à ' de ' ou ' da '...
+					if ![' de ', ' da '].include? str_t[str_t.size-t.size-4].to_s + str_t[str_t.size-t.size-3..str_t.size-t.size-2].to_s + str_t[str_t.size-t.size-1].to_s
+						# Se o primeiro char do termo não for uma letra ou se o char anterior ao termo não for uma letra...
+						if (!@@simple_downcase_letters.include? t[0] or !@@simple_downcase_letters.include? str_t[str_t.size-t.size-1]) and str_t.size > 1
+							str_l = str
+							str = str[0..str.size-t.size-1].strip
+							str_t = self.remove_accents(str).downcase
+							removed << str_l[str.size..str_l.size]
+							continue = true
+						end
+					end
+				end
+			end
+		end
+		# Se o retorno for igual à alguma condição que não deve ser retornada...
+		if !not_change_if_returns_with.nil?
+			if not_change_if_returns_with.include?(self.remove_accents(str).downcase)
+				# Se for solicitado que retorne apenas com x termos que foram removidos...
+				if if_not_change_returns_with_last_removed > 0
+					removed = removed.reverse
+					(1..if_not_change_returns_with_last_removed).each { |n| str += removed[n-1].to_s }
+					return str.strip
+				end
+				return str_o
+			end
+		end
+		return str
+	end
+	def self.extract_dates str, reference_date=Time.now, reverse_month_day=false
+		return [] if str.nil?
+		return [Time.new(str.year, str.month, str.day)] if str.kind_of?(Time) or str.kind_of?(Date) or str.kind_of?(DateTime)
+		o_str = str
+		years = []
+		(-30..20).each { |y| years << reference_date.year+y }
+		begin
+			str = str.gsub /[0-9]{1,}(º|ª)/, ' '
+			str = self.remove_accents str
+			str = str.downcase
+			str = str.gsub /[0-9]{1,}+[a-z]{1,}+[0-9]{1,}/, ''
+			str = str.gsub /[0-9]{1,}+[a-z]{1,}/, ' '
+			str = str.gsub /[a-z]{1,}+[0-9]{1,}/, ' '
+			str = str.gsub(/[^a-z|^0-9|^\/|^\-|^\.|^:]/i, ' ')
+			str = str.gsub(/[0-9]{1,}:[0-9]{1,}|:[0-9]{1,}|[0-9]{1,}h[0-9]{1,}|[0-9]{1,}%|[0-9]{1,}h |[0-9]{1,}h$|palco [0-9]{1,}/i, '')
+			str.scan(/[0-9]{1,}+.+[0-9]{1,}/).each { |d| str = str.gsub(d, d.gsub('.', '/')) }
+			if reverse_month_day
+				str.scan(/[0-9]{1,}\/[0-9]{1,}/).each do |d|
+					str = str.gsub(d, d.split('/')[1] + '/' + d.split('/')[0])
+				end
+			end
+			@@months_strs.each do |mc|
+				str.scan(/#{mc.first}.*[0-9]{1,2}+[1-9]{2,4}/).each do |md|
+					if md.scan(/[0-9]{1,2}/).size < 4 and md.scan(/[0-9]{4,}/).size < (md.scan(/[0-9]{2,2}/).size-1)
+						continue = true
+						@@months_strs.each do |smc|
+							md.scan(/[0-9].*#{smc.first}/).each do |d|
+								continue = false
+							end
+						end
+						if continue
+							m = md.scan(/[0-9]{1,2}/).first
+							str = str.gsub(/#{mc.first}.+#{m}/, "#{m} #{mc.first}").gsub(',', '')
+						end
+					end
+				end
+			end
+			str.scan(/[0-9]{4,4}-[0-9]{1,2}-[0-9]{1,2}/).each do |y|
+				str = str.gsub(y, y.split('-')[2] + '/' + y.split('-')[1] + '/' + y.split('-')[0])
+			end
+			str.scan(/[0-9]{4,4}\/[0-9]{1,2}\/[0-9]{1,2}/).each do |y|
+				str = str.gsub(y, y.split('/')[2] + '/' + y.split('/')[1] + '/' + y.split('/')[0])
+			end
+			str.scan(/[0-9]{1,2}-[0-9]{1,2}-[0-9]{2,4}/).each do |y|
+				str = str.gsub(y, y.split('-')[0] + '/' + y.split('-')[1] + '/' + y.split('-')[2])
+			end
+			str.scan(/[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{1,}/).each do |y|
+				if y.split('/')[2].size < 4
+					sr = y.split('/').first + '/' + y.split('/')[1]
+					sy = y.split('/')[2]
+					if sy.size < 3
+						sy = '0' + sy if sy.size == 1
+						if years.include? (reference_date.year.to_s[0..1]+sy).to_i
+							sr += '/' + reference_date.year.to_s[0..1]+sy
+						elsif years.include? ((reference_date.year-100).to_s[0..1]+sy).to_i
+							sr += '/' + (reference_date.year-100).to_s[0..1]+sy
+						end
+					end
+					str = str.gsub(y, sr)
+				end
+			end
+			str = str.gsub(/[0-9]{5,}/, '')
+			dates = []
+			continue = true
+			while continue
+				@@months_strs.each do |m|
+					str.scan(/([0-9].*#{m.first})+([^0-9]|$)/).each do |d|
+						days = d.first.split(/(#{m.first})+([^0-9]|$)/).first
+						jump=false
+						@@months_strs.each do |mc|
+							if days.scan(/([0-9].*#{mc.first})+([^0-9]|$)/).size > 0
+								jump = true
+							end
+						end
+						if !jump
+							year = nil
+							str.scan(/#{days}#{m.first}.*[0-9]{4,4}/).each do |sc|
+								sy = sc.gsub(/(#{days}#{m.first})+([^0-9]|$)/, '')
+								# [lorem 9/jan/2012] = false
+								# [2012 e 07/05/2012] = true
+								# [2012] = true
+								if sy.scan(/[0-9]{4,4}/).size > 1 or (sy.scan(/[0-9]{4,4}/).size == 1 and !sy.gsub(/[0-9]{4,4}/, '').match(/[0,9]/))
+									sy.scan(/[0-9]{4,4}/).each { |y| year=y.to_i if years.include? y.to_i; break; }
+								end
+							end
+							#puts '[' + str + '] => ' + year.inspect
+							str = str.gsub(/(#{days}#{m.first})+([^0-9]|$)/, '')
+							#puts '[' + str + "\n\n"
+							days.gsub(/[0-9]{4,4}/, '').scan(/[0-9]{1,2}/).each do |day|
+								day = day.to_i
+								if day > 0 and day < 32
+									if year
+										dates<<Time.new(year, m[1], day)
+									elsif m[1]<(reference_date.month-3)
+										dates<<Time.new(reference_date.year+1, m[1], day)
+									else
+										dates<<Time.new(reference_date.year, m[1], day)
+									end
+								end
+							end
+						end
+					end
+				end
+				continue = false
+				@@months_strs.each do |mt|
+						if str.scan(/([0-9].*#{mt.first})+([^0-9]|$)/).size > 0
+						continue = true
+					end
+				end
+			end
+			return dates.uniq.sort
+		rescue => exc
+			return []
+		end
+	end
+	def self.extract_time str, date=nil, reference_time=Time.now
+		return nil if date.nil?
+		return Time.new(date.year, date.month, date.day, reference_time.hour, reference_time.min) if str.nil? or !str.match /[0-9]{1,2}:[0-9]{1,2}/
+		begin
+			time = str.scan(/[0-9]{1,2}:[0-9]{1,2}/).first.split(':')
+			return Time.new(date.year, date.month, date.day, time[0], time[1])
+		rescue => exp
+			return Time.new(date.year, date.month, date.day, reference_time.hour, reference_time.min)
+		end
+	end
+	private
+	def self.treat_encoding_s str, debug=false
+		begin
+			str_r = ''
+			ws = str.split(' ').each_slice(20)
+			ws.each_with_index do |w, i|
+				if i == 0
+					str_r += self.treat_encoding_i w.join(' '), 0, debug
+				else
+					str_r += ' ' + self.treat_encoding_i(w.join(' '), 0, debug)
+				end
+			end
+		rescue => exp
+			str_r = ''
+			str.chars.each_slice(200).each { |w| str_r += self.treat_encoding_i w.join, 0, debug }
+		end
+		return str_r
+	end
+	def self.valid_encoding str, tolerance=0, debug=false
+		str_v = str
+		begin
+			str_v.match 'á'
+			str_v = str_v.gsub /\s{1,}|\n{1,}|\r{1,}/, ''
+			@@legal_chars.each { |lc| str_v = str_v.gsub lc, '' }
+			@@invalid_sequences.each { |is| raise 'invalid sequence: ' + is if str.match is }
+			puts '[' + str_v + ']' if debug and str_v.size > 0
+			return false if str_v.size > tolerance
+			str_v.split('').each { |c| str = str.gsub c, '' } if str_v.size > 0
+			return str
+		rescue => exp
+			#puts '[error] ' + exp.message if debug or !exp.message.match /incompatible encoding|invalid byte sequence|invalid sequence/i
+			return false
+		end
+	end
+	def self.treat_encoding_i str, tolerance=0, debug=false
+		str_t = str
+		str_v = self.valid_encoding str_t, tolerance, debug
+		if !str_v
+			puts '[try force_encoding UTF-8]' if debug
+			begin
+				str_t = str.force_encoding 'UTF-8'
+			rescue  => exp
+			end
+		else
+			return str_v
+		end
+		str_v = self.valid_encoding str_t, tolerance, debug
+		if !str_v
+			puts '[try WINDOWS-1252]' if debug
+			begin
+				str_t = str.encode 'UTF-8', 'WINDOWS-1252'
+			rescue  => exp
+			end
+		else
+			return str_v
+		end
+		str_v = self.valid_encoding str_t, tolerance, debug
+		if !str_v
+			puts '[try UTF-8]' if debug
+			begin
+				str_t = str.encode 'UTF-8', 'UTF-8'
+			rescue  => exp
+			end
+		else
+			return str_v
+		end
+		str_v = self.valid_encoding str_t, tolerance, debug
+		if !str_v
+			puts '[try ISO-8859-2]' if debug
+			begin
+				str_t = str.encode 'UTF-8', 'ISO-8859-2'
+			rescue  => exp
+			end
+		else
+			return str_v
+		end
+		str_v = self.valid_encoding str_t, tolerance, debug
+		if !str_v
+			puts '[try ISO-8859-3]' if debug
+			begin
+				str_t = str.encode 'UTF-8', 'ISO-8859-3'
+			rescue  => exp
+			end
+		else
+			return str_v
+		end
+		str_v = self.valid_encoding str_t, tolerance, debug
+		if tolerance == 0 and !str_v
+			str_t = self.treat_encoding_i str, 1, debug
+		end
+		return str_t
+	end
+	def self.upcaseword w
+		return w if w.to_s == ''
+		if w.scan(/#{@@separators_regex.join('|')}/).size == 0
+			# Cria uma Array apenas com os caracteres necessários por questões de performance.
+			letters = []
+			clean_word = self.remove_accents(w).downcase.gsub(/[^a-z]/, '')
+			clean_word.split('').uniq.each { |lt| @@letters_by_letter[lt].each { |l| letters << l } }
+			trf = 'tm'
+			trf = 'tfu'	if w.size > 5 or !@@articles_and_others.include? clean_word
+			trf = 'tau'	if !w.match(/^mr$|^jr$|^mr.$|^jr.$|^sr$|^sr.$/i) and ((w.size < 6 and clean_word.match(/[^aeiouwy]{4,}|[aeiouwy]{4,}|^[^aeiouwy]{2,3}$/)) or w.scan('.').size > 2)
+			letters.each do |l|
+				# Transforma tudo em minúsculo.
+				w = w.gsub l[1], l[0] if trf == 'tm' || trf == 'tfu'
+				# Maiúsculo na primeira letra caso não seja um artigo ou algo do gênero.
+				w = w.gsub /^#{l[0]}/, l[1] if trf == 'tfu'
+				# Transforma em maiúsculo:
+				# * Sequência de 4 ou mais consoantes.
+				# * Sequência de 4 ou mais vogais.
+				# * Sequência exata de 2 ou 3 vogais.
+				w = w.gsub l[0], l[1] if trf == 'tau'
+			end
+		else
+			# Quebra termos entre caracteres separadores como "'", "(", etc.
+			@@separators.each do |l|
+				sw = w.split(l)
+				if sw.size > 1
+					# Trata o termo isoladamente se não for uma letra única antes de "'"
+					sw.each_with_index { |v, i| sw[i] = upcaseword v if !(["'"].include? l and v.size == 1 and i == 0) }
+					if w[w.size-1] == l
+						w = sw.join(l) + l
+					else
+						w = sw.join(l)
+					end
+				end
+			end
+		end
+		return w
+	end
+end

data/lib/fk_str/dictionary.rb ADDED Viewed

@@ -0,0 +1,106 @@
+# encoding: utf-8
+module FkStr
+	@@months_strs = {
+		'jan' => 1, 'fev' => 2, 'mar' => 3, 'abr' => 4, 'mai' => 5, 'jun' => 6,
+		'jul' => 7, 'ago' => 8, 'set' => 9, 'out' => 10, 'nov' => 11, 'dez' => 12,
+		'/1' => 1, '/2' => 2, '/3' => 3, '/4' => 4,
+		'/5' => 5, '/6' => 6, '/7' => 7, '/8' => 8, '/9' => 9,
+		'/01' => 1, '/02' => 2, '/03' => 3, '/04' => 4, '/05' => 5, '/06' => 6,
+		'/07' => 7, '/08' => 8, '/09' => 9, '/10' => 10, '/11' => 11, '/12' => 12,
+		'feb' => 2, 'apr' => 4, 'may' => 5, 'aug' => 8, 'sep' => 9, 'oct' => 10, 'dec' => 12
+	}
+	@@letters_by_letter = {
+		'a' => { 'a' => 'A', 'á' => 'Á', 'ã' => 'Ã', 'â' => 'Â', 'à' => 'À', 'ä' => 'Ä', 'ă' => 'Ă', 'ā' => 'Ā', 'å' => 'Å', 'æ' => 'Æ' },
+		'b' => { 'b' => 'B' },
+		'c' => { 'c' => 'C', 'ç' => 'Ç', 'č' => 'Č' },
+		'd' => { 'd' => 'D' },
+		'e' => { 'e' => 'E', 'é' => 'É', 'ẽ' => 'Ẽ', 'ê' => 'Ê', 'è' => 'È', 'ë' => 'Ë', 'ĕ' => 'Ĕ', 'ē' => 'Ē' },
+		'f' => { 'f' => 'F' },
+		'g' => { 'g' => 'G', 'ğ' => 'Ğ' },
+		'h' => { 'h' => 'H' },
+		'i' => { 'i' => 'I', 'í' => 'Í', 'ĩ' => 'Ĩ', 'î' => 'Î', 'ì' => 'Ì', 'ï' => 'Ï', 'ĭ' => 'Ĭ', 'ī' => 'Ī' },
+		'j' => { 'j' => 'J' },
+		'k' => { 'k' => 'K' },
+		'l' => { 'l' => 'L' },
+		'm' => { 'm' => 'M' },
+		'n' => { 'n' => 'N', 'ñ' => 'Ñ' },
+		'o' => { 'o' => 'O', 'ó' => 'Ó', 'õ' => 'Õ', 'ô' => 'Ô', 'ò' => 'Ò', 'ö' => 'Ö', 'ŏ' => 'Ŏ', 'ō' => 'Ō', 'ő' => 'Ő', 'ð' => 'Ð' },
+		'p' => { 'p' => 'P' },
+		'q' => { 'q' => 'Q' },
+		'r' => { 'r' => 'R' },
+		's' => { 's' => 'S', 'š' => 'Š' },
+		't' => { 't' => 'T' },
+		'u' => { 'u' => 'U', 'ú' => 'Ú', 'ũ' => 'Ũ', 'û' => 'Û', 'ù' => 'Ù', 'ü' => 'Ü', 'ŭ' => 'Ŭ', 'ū' => 'Ū', 'ǖ' => 'Ǖ' },
+		'v' => { 'v' => 'V' },
+		'w' => { 'w' => 'W' },
+		'x' => { 'x' => 'X' },
+		'y' => { 'y' => 'Y', 'ȳ' => 'Ȳ', 'ÿ' => 'Ÿ', 'ý' => 'Ý', 'ỳ' => 'Ỳ' },
+		'z' => { 'z' => 'Z', 'ž' => 'Ž' }
+	}
+	@@articles_and_others = [
+		# Português
+		'a', 'ao', 'aos', 'as',
+		'co', 'coa', 'coas', 'com', 'cos',
+		'da', 'das', 'de', 'do', 'dos', 'dum', 'duma', 'dumas', 'duns',
+		'e', 'em',
+		'na', 'nas', 'no', 'nos', 'num', 'numa', 'numas', 'nuns',
+		'o', 'os', 'ou',
+		'pela', 'pelas', 'pelo', 'pelos', 'per', 'por',
+		'um', 'uma', 'umas', 'uns',
+		# English
+		'an', 'and', 'at', 'by', 'in', 'of', 'or', 'on', 's', 'the'
+	]
+	def FkStr.articles_and_others
+		return @@articles_and_others
+	end
+	@@countries_acronyms = [
+		# Brasil
+		'br',
+		'ac', 'al', 'ap', 'am', 'ba', 'ce', 'df', 'es', 'go', 'ma', 'mt','ms', 'mg', 'pa', 'pb',
+		'pr', 'pe', 'pi', 'rj', 'rn', 'rs', 'ro', 'rr', 'sc', 'sp', 'se', 'to',
+		# USA
+		'us',
+		'ak', 'al', 'ar', 'az', 'ca', 'co', 'ct', 'dc', 'de', 'fl', 'ga', 'hi', 'ia', 'id', 'il',
+		'in', 'ks', 'ky', 'la', 'ma', 'md', 'me', 'mi', 'mn', 'mo', 'ms', 'mt', 'nc', 'nd', 'ne',
+		'nh', 'nj', 'nm', 'nv', 'ny', 'oh', 'ok', 'or', 'pa', 'ri', 'sc', 'sd', 'tn', 'tx', 'ut',
+		'va', 'vt', 'wa', 'wi', 'wv', 'wy'
+	]
+	def FkStr.countries_acronyms
+		return @@countries_acronyms
+	end
+	@@simple_downcase_letters = [
+		'a', 'b', 'c', 'd', 'e',
+		'f', 'g', 'h', 'i', 'j',
+		'k', 'l', 'm', 'n', 'o',
+		'p', 'q', 'r', 's', 't',
+		'u', 'v', 'w', 'x', 'y',
+		'z'
+	]
+	@@simple_downcase_consonants = [
+		'b', 'c', 'd',
+		'f', 'g', 'h', 'j',
+		'k', 'l', 'm', 'n',
+		'p', 'q', 'r', 's', 't',
+		'v', 'w', 'x', 'y',
+		'z'
+	]
+	@@separators = ['/', '-', '_', ',', '.', "'", '"', '(', ')', '[', ']', '{', '}', '|', '\\', ';']
+	def FkStr.separators
+		return @@separators
+	end
+	@@separators_regex = ['\/', '\-', "\'", '\"', '\(', '\)', '\[', '\]', '\{', '\}']
+	@@legal_chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz _-!@#$%&*+=?^~´`,.:;\'"()[]{}|/\\<>ÁÃÂÀÄĂĀÅÆáãâàäăāåæÉẼÊÈËĔĒéẽêèëĕēÍĨÎÌÏĬĪíĩîìïĭīÓÕÔÒÖŎŌŐÐóõôòöŏōőðšŠÚŨÛÙÜŬŪǕúũûùüŭūǖÇçČčĞğÑñȲȳŸÿÝýỲỳŽž¹²³ºª	– ’©®℗¿¡±“”•«»‘°'.split('')
+	@@invalid_sequences = ['Ã©']
+end

data/test/test_fk_str.rb ADDED Viewed

@@ -0,0 +1,255 @@
+# encoding: utf-8
+require 'test/unit'
+require 'fk_str'
+class FkStrTest < Test::Unit::TestCase
+	def test_treat_encoding
+		assert_equal(
+			'çUTF-8',
+			FkStr.treat_encoding("\xE7")+FkStr.treat_encoding("\xE7").encoding.to_s.upcase
+		)
+		assert_equal(
+			'©UTF-8',
+			FkStr.treat_encoding("\xC2\xA9")+FkStr.treat_encoding("\xC2\xA9").encoding.to_s.upcase
+		)
+		assert_equal(
+			'caçaUTF-8',
+			FkStr.treat_encoding("ca\xE7a")+FkStr.treat_encoding("ca\xE7a").encoding.to_s.upcase
+		)
+		assert_equal(
+			'casaUTF-8',
+			FkStr.treat_encoding('casa')+FkStr.treat_encoding('casa').encoding.to_s.upcase
+		)
+	end
+	def test_is_eq
+		assert_equal(
+			true,
+			FkStr.is_eq('Hangar 110', 'Hangar 110', 40)
+		)
+		assert_equal(
+			true,
+			FkStr.is_eq('Armagedon + Atos de Vingança', 'Armagedom')
+		)
+		assert_equal(
+			false,
+			FkStr.is_eq('Gato Cat', 'Cachorro Dog')
+		)
+		assert_equal(
+			true,
+			FkStr.is_eq('Creedence Clearwater Revisited', 'Creedence Clearwater')
+		)
+	end
+	def test_to_slug
+		assert_equal(
+			'teste-dog',
+			FkStr.to_slug('teste:dog')
+		)
+		assert_equal(
+			'centro-rio-de-janeiro-rj',
+			FkStr.to_slug('Centro - Rio de Janeiro [RJ]')
+		)
+		assert_equal(
+			'sao-paulo-sp',
+			FkStr.to_slug('São Paulo/SP')
+		)
+		assert_equal(
+			'sao-paulo-sp',
+			FkStr.to_slug('São Paulo_SP')
+		)
+	end
+	def test_to_term
+		assert_equal(
+			'kasadujkakurururi',
+			FkStr.to_term('casa & dog and cachorro e lorem')
+		)
+		assert_equal(
+			'tistiduj',
+			FkStr.to_term('teste:de\dog')
+		)
+		assert_equal(
+			'saupauru',
+			FkStr.to_term('São Paulo-SP')
+		)
+		assert_equal(
+			'tistiduj',
+			FkStr.to_term('teste:de:dog')
+		)
+	end
+	def test_remove_accents
+		assert_equal(
+			'Sao Jose do Rio Preto - SP',
+			FkStr.remove_accents('São José do Rio Preto - SP')
+		)
+		assert_equal(
+			'Sao Paulo',
+			FkStr.remove_accents('São Paulo')
+		)
+		assert_equal(
+			'Acougue',
+			FkStr.remove_accents('Açougue')
+		)
+		assert_equal(
+			'Lorem Ipsum',
+			FkStr.remove_accents('Lôrém Ipsum')
+		)
+	end
+	def test_upcasewords
+		assert_equal(
+			'Charlie Brown Jr.',
+			FkStr.upcasewords('CHARLIE BROWN JR.')
+		)
+		assert_equal(
+			'Coldplay',
+			FkStr.upcasewords('COLDPLAY')
+		)
+		assert_equal(
+			'Queensrÿche',
+			FkStr.upcasewords('QUEENSRŸCHE')
+		)
+		assert_equal(
+			'Mindflow',
+			FkStr.upcasewords('MINDFLOW')
+		)
+	end
+	def test_remove_if_ends_with
+		assert_equal(
+			'Natal La Barra',
+			FkStr.remove_if_ends_with(
+				'Natal La Barra - Caxias do Sul / RS',
+				['La Barra', 'Caxias do Sul', 'RS', '/', '-'],
+				['Natal'],
+				1
+			)
+		)
+		assert_equal(
+			'Natal La Barra -',
+			FkStr.remove_if_ends_with(
+				'Natal La Barra - Caxias do Sul / RS',
+				['La Barra', 'Caxias do Sul', 'RS', '/', '-'],
+				['Natal'],
+				2
+			)
+		)
+		assert_equal(
+			'Natal La Barra - Caxias do Sul',
+			FkStr.remove_if_ends_with(
+				'Natal La Barra - Caxias do Sul / RS',
+				['La Barra', 'Caxias do Sul', 'RS', '/', '-'],
+				['Natal'],
+				3
+			)
+		)
+		assert_equal(
+			'Masp',
+			FkStr.remove_if_ends_with('Masp São Paulo/SP', ['São Paulo', 'SP', '/'])
+		)
+	end
+	def test_extract_dates
+		assert_equal(
+			[Time.new(2012, 12, 6)].uniq.sort,
+			FkStr.extract_dates('December 06, 2012', Time.new(2012, 9, 12))
+		)
+		assert_equal(
+			[Time.new(2012, 9, 14)].uniq.sort,
+			FkStr.extract_dates('FRI 09.14.2012', Time.new(2012, 9, 12), true)
+		)
+		assert_equal(
+			[Time.new(2011, 12, 8), Time.new(2012, 1, 9)].uniq.sort,
+			FkStr.extract_dates('8/dez lorem 9/jan/2012', Time.new(2011, 10, 8))
+		)
+		assert_equal(
+			[Time.new(2012, 1, 2)].uniq.sort,
+			FkStr.extract_dates('2 de janeiro', Time.new(2011, 10, 8))
+		)
+	end
+	def test_extract_time
+		assert_equal(
+			Time.new(2011, 07, 14, 22, 18, 0),
+			FkStr.extract_time(
+				'Thu, 14 Jul 2011 22:18:49 +0000',
+				FkStr.extract_dates('Thu, 14 Jul 2011 22:18:49 +0000',Time.new(2012, 5, 28)).first,
+				Time.new(2012, 5, 28)
+			)
+		)
+		assert_equal(
+			Time.new(2011, 07, 14, 16, 15, 0),
+			FkStr.extract_time(
+				'14 Jul 2011 16:15',
+				FkStr.extract_dates('14 Jul 2011 16:15',Time.new(2012, 5, 28)).first,
+				Time.new(2012, 5, 28)
+			)
+		)
+		assert_equal(
+			Time.new(2011, 07, 14, 9, 0, 0),
+			FkStr.extract_time(
+				'14 Jul 2011 9:00',
+				FkStr.extract_dates('14 Jul 2011 9:00',Time.new(2012, 5, 28)).first,
+				Time.new(2012, 5, 28)
+			)
+		)
+		assert_equal(
+			Time.new(2011, 07, 14, 7, 35, 0),
+			FkStr.extract_time(
+				'14 Jul 2011 07:35',
+				FkStr.extract_dates('14 Jul 2011 07:35',Time.new(2012, 5, 28)).first,
+				Time.new(2012, 5, 28)
+			)
+		)
+	end
+end

metadata ADDED Viewed

@@ -0,0 +1,49 @@
+--- !ruby/object:Gem::Specification
+name: fk_str
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+  prerelease:
+platform: ruby
+authors:
+- Guilherme Baptista
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-03-13 00:00:00.000000000 Z
+dependencies: []
+description: String manipulation.
+email: guilhermebaptistasilva@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- LICENSE
+- Rakefile
+- lib/fk_str.rb
+- lib/fk_str/dictionary.rb
+- test/test_fk_str.rb
+homepage: https://github.com/gbaptista/fk_str
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.24
+signing_key:
+specification_version: 3
+summary: FkStr
+test_files: []