normatron 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +35 -48
- data/Rakefile +6 -7
- data/lib/generators/normatron/install_generator.rb +23 -0
- data/lib/generators/normatron/templates/normatron.rb +4 -0
- data/lib/normatron.rb +22 -8
- data/lib/normatron/configuration.rb +26 -22
- data/lib/normatron/extensions.rb +8 -0
- data/lib/normatron/extensions/active_record.rb +20 -15
- data/lib/normatron/filters.rb +26 -379
- data/lib/normatron/filters/blank_filter.rb +29 -0
- data/lib/normatron/filters/camelize_filter.rb +50 -0
- data/lib/normatron/filters/capitalize_filter.rb +29 -0
- data/lib/normatron/filters/chomp_filter.rb +34 -0
- data/lib/normatron/filters/dasherize_filter.rb +25 -0
- data/lib/normatron/filters/downcase_filter.rb +29 -0
- data/lib/normatron/filters/dump_filter.rb +27 -0
- data/lib/normatron/filters/helpers.rb +44 -0
- data/lib/normatron/filters/keep_filter.rb +100 -0
- data/lib/normatron/filters/remove_filter.rb +37 -0
- data/lib/normatron/filters/squeeze_filter.rb +30 -0
- data/lib/normatron/filters/squish_filter.rb +28 -0
- data/lib/normatron/filters/strip_filter.rb +33 -0
- data/lib/normatron/filters/swapcase_filter.rb +30 -0
- data/lib/normatron/filters/titleize_filter.rb +29 -0
- data/lib/normatron/filters/underscore_filter.rb +45 -0
- data/lib/normatron/filters/upcase_filter.rb +29 -0
- data/lib/normatron/version.rb +3 -0
- data/spec/normatron/configuration_spec.rb +60 -0
- data/spec/normatron/extensions/active_record_spec.rb +96 -0
- data/spec/normatron/filters/blank_filter_spec.rb +15 -0
- data/spec/normatron/filters/camelize_filter_spec.rb +42 -0
- data/spec/normatron/filters/capitalize_filter_spec.rb +14 -0
- data/spec/normatron/filters/chomp_filter_spec.rb +15 -0
- data/spec/normatron/filters/dasherize_filter_spec.rb +9 -0
- data/spec/normatron/filters/downcase_filter_spec.rb +10 -0
- data/spec/normatron/filters/dump_filter_spec.rb +10 -0
- data/spec/normatron/filters/keep_filter_spec.rb +86 -0
- data/spec/normatron/filters/remove_filter_spec.rb +86 -0
- data/spec/normatron/filters/squeeze_filter_spec.rb +10 -0
- data/spec/normatron/filters/squish_filter_spec.rb +12 -0
- data/spec/normatron/filters/strip_filter_spec.rb +12 -0
- data/spec/normatron/filters/swapcase_filter_spec.rb +12 -0
- data/spec/normatron/filters/titleize_filter_spec.rb +12 -0
- data/spec/normatron/filters/underscore_filter_spec.rb +26 -0
- data/spec/normatron/filters/upcase_filter_spec.rb +10 -0
- data/spec/normatron_spec.rb +28 -2
- data/spec/spec_helper.rb +37 -4
- data/spec/support/my_filters.rb +7 -0
- data/spec/support/user_model.rb +14 -0
- metadata +64 -13
- data/spec/configuration_spec.rb +0 -53
- data/spec/extensions/active_record_spec.rb +0 -114
- data/spec/filters_spec.rb +0 -442
- data/spec/support/model_model.rb +0 -3
- data/spec/support/schema.rb +0 -7
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module BlankFilter
|
6
|
+
|
7
|
+
##
|
8
|
+
# Returns nil for a blank string or the string itself otherwise.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# BlankFilter.evaluate("") #=> nil
|
12
|
+
# BlankFilter.evaluate(" ") #=> nil
|
13
|
+
# BlankFilter.evaluate(" \n ") #=> nil
|
14
|
+
# BlankFilter.evaluate("1") #=> "1"
|
15
|
+
# BlankFilter.evaluate("It's blank?") #=> "It's blank?"
|
16
|
+
#
|
17
|
+
# @example Using as ActiveRecord::Base normalizer
|
18
|
+
# normalize :attribute_a, :with => :blank
|
19
|
+
# normalize :attribute_b, :with => [:custom_filter, :blank]
|
20
|
+
#
|
21
|
+
# @param input [String] A character sequence
|
22
|
+
# @return [String, nil] The object itself or nil
|
23
|
+
# @see http://api.rubyonrails.org/classes/String.html#method-i-blank-3F String#blank?
|
24
|
+
def self.evaluate(input)
|
25
|
+
input.kind_of?(String) && input.blank? ? nil : input
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module CamelizeFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Converts strings to UpperCamelCase by default and to lowerCamelCase if the <tt>:lower</tt> argument is given.
|
10
|
+
# <tt>camelize</tt> will also convert '/' to '::' which is useful for converting paths to namespaces.
|
11
|
+
# As a rule of thumb you can think of camelize as the inverse of underscore, though there are cases where that does not hold:
|
12
|
+
# "SSLError".underscore.camelize # => "SslError"
|
13
|
+
# This filter has a similar behavior to
|
14
|
+
# ActiveSupport::Inflector#camelize[http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-camelize], but with following differences:
|
15
|
+
# * Uses UTF-8 charset
|
16
|
+
# * Affects accented characters
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# CamelizeFilter.evaluate("active_record/errors") #=> "ActiveRecord::Errors"
|
20
|
+
# CamelizeFilter.evaluate("active_record/errors", :upper) #=> "ActiveRecord::Errors"
|
21
|
+
# CamelizeFilter.evaluate("active_record/errors", :lower) #=> "activeRecord::Errors"
|
22
|
+
#
|
23
|
+
# @example Using as ActiveRecord::Base normalizer
|
24
|
+
# normalize :attribute_a, :with => :camelize
|
25
|
+
# normalize :attribute_b, :with => [:custom_filter, :camelize]
|
26
|
+
# normalize :attribute_c, :with => [[:camelize, :lower]]
|
27
|
+
# normalize :attribute_d, :with => [{:camelize => :lower}]
|
28
|
+
# normalize :attribute_e, :with => [:custom_filter, [:camelize, :lower]]
|
29
|
+
# normalize :attribute_f, :with => [:custom_filter, {:camelize => :lower}]
|
30
|
+
#
|
31
|
+
# @param [String] input A character sequence
|
32
|
+
# @param [Symbol] first_letter_case <tt>:lower</tt> for lowerCamelCase or <tt>:upper</tt> for UpperCamelCase
|
33
|
+
# @return [String] The camelized character sequence or the object itself
|
34
|
+
# @see http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-camelize ActiveSupport::Inflector#camelize
|
35
|
+
# @see UnderscoreFilter Normatron::Filters::UnderscoreFilter
|
36
|
+
# @todo Performance tests
|
37
|
+
# @todo Exception class
|
38
|
+
def self.evaluate(input, first_letter_case = :upper)
|
39
|
+
return input unless input.kind_of?(String)
|
40
|
+
|
41
|
+
if first_letter_case == :upper
|
42
|
+
string = input.sub(/^[\p{L}\d]*/u) { acronyms[$&] || mb_send(:capitalize, $&) }
|
43
|
+
else first_letter_case == :lower
|
44
|
+
string = input.sub(/^(?:#{acronym_regex}(?=\b|[\p{L}_])|\p{Word}*_)/u) { mb_send(:downcase, $&) }
|
45
|
+
end
|
46
|
+
string.gsub!(/(?:_|(\/))([\p{L}\d]*)/iu) { "#{$1}#{acronyms[$2] || mb_send(:capitalize, $2)}" }.gsub!('/', '::')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module CapitalizeFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Makes the first character uppercase after lowercase all other characters.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# CapitalizeFilter.evaluate("KEEP IT SIMPLE") #=> "Keep it simple"
|
13
|
+
# CapitalizeFilter.evaluate("keep it simple") #=> "Keep it simple"
|
14
|
+
# CapitalizeFilter.evaluate(" KEEP IT SIMPLE") #=> " keep it simple"
|
15
|
+
#
|
16
|
+
# @example Using as ActiveRecord::Base normalizer
|
17
|
+
# normalize :attribute_a, :with => :capitalize
|
18
|
+
# normalize :attribute_b, :with => [:custom_filter, :capitalize]
|
19
|
+
#
|
20
|
+
# @param [String] input A character sequence
|
21
|
+
# @return [String] The capitalized character sequence or the object itself
|
22
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-capitalize String#capitalize
|
23
|
+
# @see TitleizeFilter Normatron::Filters::TitleizeFilter
|
24
|
+
def self.evaluate(input)
|
25
|
+
input.kind_of?(String) ? mb_send(:capitalize, input) : input
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Normatron
|
2
|
+
module Filters
|
3
|
+
module ChompFilter
|
4
|
+
|
5
|
+
##
|
6
|
+
# Remove the given record separator from the end of the string (If present).
|
7
|
+
# If <tt>$/</tt> has not been changed from the default Ruby record separator,
|
8
|
+
# then chomp also removes carriage return characters (that is it will remove <tt>\n</tt>, <tt>\r</tt>, and <tt>\r\n</tt>).
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# ChompFilter.evaluate("Bon Scott\n") #=> "Bon Scott"
|
12
|
+
# ChompFilter.evaluate("Bon Scott\r") #=> "Bon Scott"
|
13
|
+
# ChompFilter.evaluate("Bon Scott\r\n") #=> "Bon Scott"
|
14
|
+
# ChompFilter.evaluate("Bon Scott\n\r") #=> "Bon Scott\n"
|
15
|
+
# ChompFilter.evaluate("Bon Scott", " Scott") #=> "Bon"
|
16
|
+
#
|
17
|
+
# @example Using as ActiveRecord::Base normalizer
|
18
|
+
# normalize :attribute_a, :with => :chomp
|
19
|
+
# normalize :attribute_b, :with => [:custom_filter, :chomp]
|
20
|
+
# normalize :attribute_c, :with => [[:chomp, "x"]]
|
21
|
+
# normalize :attribute_d, :with => [{:chomp => "y"}]
|
22
|
+
# normalize :attribute_e, :with => [:custom_filter, [:chomp, "z"]]
|
23
|
+
# normalize :attribute_f, :with => [:custom_filter, {:chomp => "\f"}]
|
24
|
+
#
|
25
|
+
# @param [String] input A character sequence
|
26
|
+
# @param [String] separator A character sequence
|
27
|
+
# @return [String] The chopped character sequence or the object itself
|
28
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-chomp String#chomp
|
29
|
+
def self.evaluate(input, separator=$/)
|
30
|
+
input.kind_of?(String) ? input.chomp(separator) : input
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module DasherizeFilter
|
6
|
+
|
7
|
+
##
|
8
|
+
# Replaces all underscores with dashes.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# DasherizeFilter.evaluate("monty_python") #=> "monty-python"
|
12
|
+
#
|
13
|
+
# @example Using as ActiveRecord::Base normalizer
|
14
|
+
# normalize :attribute_a, :with => :dasherize
|
15
|
+
# normalize :attribute_b, :with => [:custom_filter, :dasherize]
|
16
|
+
#
|
17
|
+
# @param [String] input A character sequence
|
18
|
+
# @return [String] The dasherized character sequence or the object itself
|
19
|
+
# @see http://api.rubyonrails.org/classes/String.html#method-i-dasherize String#dasherize
|
20
|
+
def self.evaluate(input)
|
21
|
+
input.kind_of?(String) ? input.dasherize : input
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module DowncaseFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Lowercase all characters.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# DowncaseFilter.evaluate("NOTHING ELSE MATTERS") #=> "nothing else matters"
|
13
|
+
#
|
14
|
+
# @example Using as ActiveRecord::Base normalizer
|
15
|
+
# normalize :attribute_a, :with => :downcase
|
16
|
+
# normalize :attribute_b, :with => [:custom_filter, :downcase]
|
17
|
+
#
|
18
|
+
# @param [String] input A character sequence
|
19
|
+
# @return [String] The lowercased character sequence or the object itself
|
20
|
+
# @see http://api.rubyonrails.org/classes/ActiveSupport/Multibyte/Chars.html#method-i-downcase ActiveSupport::Multibyte::Chars#downcase
|
21
|
+
# @see SwapcaseFilter Normatron::Filters::SwapcaseFilter
|
22
|
+
# @see TitleizeFilter Normatron::Filters::TitleizeFilter
|
23
|
+
# @see UpcaseFilter Normatron::Filters::UpcaseFilter
|
24
|
+
def self.evaluate(input)
|
25
|
+
input.kind_of?(String) ? mb_send(:downcase, input) : input
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Normatron
|
2
|
+
module Filters
|
3
|
+
module DumpFilter
|
4
|
+
|
5
|
+
##
|
6
|
+
# Creates a literal string representation with all nonprinting characters
|
7
|
+
# replaced by <tt>\\n</tt> notation and all special characters escaped.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# DumpFilter.evaluate("I'm not\na \"clubber\"...") #=> "\"I'm not\\na \\\"clubber\\\"...\""
|
11
|
+
# DumpFilter.evaluate("I'm not\na \"clubber\"...") #== '"I\'m not\na \"clubber\"..."'
|
12
|
+
# DumpFilter.evaluate('I\'m not\na "clubber"...') #=> "\"I'm not\\\\na \\\"clubber\\\"...\""
|
13
|
+
# DumpFilter.evaluate('I\'m not\na "clubber"...') #== '"I\'m not\\\na \"clubber\"..."'
|
14
|
+
#
|
15
|
+
# @example Using as ActiveRecord::Base normalizer
|
16
|
+
# normalize :attribute_a, :with => :dump
|
17
|
+
# normalize :attribute_b, :with => [:custom_filter, :dump]
|
18
|
+
#
|
19
|
+
# @param [String] input A character sequence
|
20
|
+
# @return [String] The dumpped character sequence or the object itself
|
21
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-dump String#dump
|
22
|
+
def self.evaluate(input)
|
23
|
+
input.kind_of?(String) ? input.dump : input
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'active_support/core_ext/string'
|
4
|
+
require 'active_support/inflector/inflections'
|
5
|
+
require 'active_support/multibyte/chars'
|
6
|
+
|
7
|
+
module Normatron
|
8
|
+
module Filters
|
9
|
+
module Helpers
|
10
|
+
def evaluate_regexp(value, action, *properties)
|
11
|
+
options = properties.flatten.compact.uniq
|
12
|
+
constructs = options.map{ |s| "\\p{#{s.to_s}}" } * ""
|
13
|
+
regex = /[#{'^' if action == :keep}#{constructs}]/u
|
14
|
+
value.gsub(regex, "")
|
15
|
+
end
|
16
|
+
|
17
|
+
def evaluate_strip(value, edges)
|
18
|
+
constructs = []
|
19
|
+
constructs << '\A\s*' if edges == :L || edges == :LR
|
20
|
+
constructs << '\s*\z' if edges == :R || edges == :LR
|
21
|
+
regex_string = constructs.join '|'
|
22
|
+
regex = Regexp.new(/#{regex_string}/)
|
23
|
+
|
24
|
+
value.gsub(regex, '')
|
25
|
+
end
|
26
|
+
|
27
|
+
def acronyms
|
28
|
+
inflections.acronyms
|
29
|
+
end
|
30
|
+
|
31
|
+
def acronym_regex
|
32
|
+
inflections.acronym_regex
|
33
|
+
end
|
34
|
+
|
35
|
+
def inflections
|
36
|
+
ActiveSupport::Inflector::Inflections.instance
|
37
|
+
end
|
38
|
+
|
39
|
+
def mb_send(method, value)
|
40
|
+
value.mb_chars.send(method).to_s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module KeepFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Remove the characters that doesn't match the given properties.
|
10
|
+
# The character properties follow the rule of \\p{} construct described in Regexp class.
|
11
|
+
# The \\p{} construct matches characters with the named property, much like POSIX bracket classes.
|
12
|
+
#
|
13
|
+
# To pass named properties to this filter, use them as Symbols:
|
14
|
+
# * <tt>:Alnum</tt> - Alphabetic and numeric character
|
15
|
+
# * <tt>:Alpha</tt> - Alphabetic character
|
16
|
+
# * <tt>:Blank</tt> - Space or tab
|
17
|
+
# * <tt>:Cntrl</tt> - Control character
|
18
|
+
# * <tt>:Digit</tt> - Digit
|
19
|
+
# * <tt>:Graph</tt> - Non-blank character (excludes spaces, control characters, and similar)
|
20
|
+
# * <tt>:Lower</tt> - Lowercase alphabetical character
|
21
|
+
# * <tt>:Print</tt> - Like :Graph, but includes the space character
|
22
|
+
# * <tt>:Punct</tt> - Punctuation character
|
23
|
+
# * <tt>:Space</tt> - Whitespace character ([:blank:], newline, carriage return, etc.)
|
24
|
+
# * <tt>:Upper</tt> - Uppercase alphabetical
|
25
|
+
# * <tt>:XDigit</tt> - Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
|
26
|
+
# * <tt>:Word</tt> - A member of one of the following Unicode general category Letter, Mark, Number, Connector_Punctuation
|
27
|
+
# * <tt>:ASCII</tt> - A character in the ASCII character set
|
28
|
+
# * <tt>:Any</tt> - Any Unicode character (including unassigned characters)
|
29
|
+
# * <tt>:Assigned</tt> - An assigned character
|
30
|
+
#
|
31
|
+
# A Unicode character's General Category value can also be matched with :Ab where Ab is the category’s abbreviation as described below:
|
32
|
+
# * <tt>:L</tt> - 'Letter'
|
33
|
+
# * <tt>:Ll</tt> - 'Letter: Lowercase'
|
34
|
+
# * <tt>:Lm</tt> - 'Letter: Mark'
|
35
|
+
# * <tt>:Lo</tt> - 'Letter: Other'
|
36
|
+
# * <tt>:Lt</tt> - 'Letter: Titlecase'
|
37
|
+
# * <tt>:Lu</tt> - 'Letter: Uppercase
|
38
|
+
# * <tt>:Lo</tt> - 'Letter: Other'
|
39
|
+
# * <tt>:M</tt> - 'Mark'
|
40
|
+
# * <tt>:Mn</tt> - 'Mark: Nonspacing'
|
41
|
+
# * <tt>:Mc</tt> - 'Mark: Spacing Combining'
|
42
|
+
# * <tt>:Me</tt> - 'Mark: Enclosing'
|
43
|
+
# * <tt>:N</tt> - 'Number'
|
44
|
+
# * <tt>:Nd</tt> - 'Number: Decimal Digit'
|
45
|
+
# * <tt>:Nl</tt> - 'Number: Letter'
|
46
|
+
# * <tt>:No</tt> - 'Number: Other'
|
47
|
+
# * <tt>:P</tt> - 'Punctuation'
|
48
|
+
# * <tt>:Pc</tt> - 'Punctuation: Connector'
|
49
|
+
# * <tt>:Pd</tt> - 'Punctuation: Dash'
|
50
|
+
# * <tt>:Ps</tt> - 'Punctuation: Open'
|
51
|
+
# * <tt>:Pe</tt> - 'Punctuation: Close'
|
52
|
+
# * <tt>:Pi</tt> - 'Punctuation: Initial Quote'
|
53
|
+
# * <tt>:Pf</tt> - 'Punctuation: Final Quote'
|
54
|
+
# * <tt>:Po</tt> - 'Punctuation: Other'
|
55
|
+
# * <tt>:S</tt> - 'Symbol'
|
56
|
+
# * <tt>:Sm</tt> - 'Symbol: Math'
|
57
|
+
# * <tt>:Sc</tt> - 'Symbol: Currency'
|
58
|
+
# * <tt>:Sc</tt> - 'Symbol: Currency'
|
59
|
+
# * <tt>:Sk</tt> - 'Symbol: Modifier'
|
60
|
+
# * <tt>:So</tt> - 'Symbol: Other'
|
61
|
+
# * <tt>:Z</tt> - 'Separator'
|
62
|
+
# * <tt>:Zs</tt> - 'Separator: Space'
|
63
|
+
# * <tt>:Zl</tt> - 'Separator: Line'
|
64
|
+
# * <tt>:Zp</tt> - 'Separator: Paragraph'
|
65
|
+
# * <tt>:C</tt> - 'Other'
|
66
|
+
# * <tt>:Cc</tt> - 'Other: Control'
|
67
|
+
# * <tt>:Cf</tt> - 'Other: Format'
|
68
|
+
# * <tt>:Cn</tt> - 'Other: Not Assigned'
|
69
|
+
# * <tt>:Co</tt> - 'Other: Private Use'
|
70
|
+
# * <tt>:Cs</tt> - 'Other: Surrogate'
|
71
|
+
#
|
72
|
+
# Lastly, this method matches a character's Unicode script. The following scripts are supported:
|
73
|
+
#
|
74
|
+
# Arabic, Armenian, Balinese, Bengali, Bopomofo, Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot, Cyrillic, Deseret, Devanagari, Ethiopic, Georgian, Glagolitic, Gothic, Greek, Gujarati, Gurmukhi, Han, Hangul, Hanunoo, Hebrew, Hiragana, Inherited, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Lao, Latin, Lepcha, Limbu, Linear_B, Lycian, Lydian, Malayalam, Mongolian, Myanmar, New_Tai_Lue, Nko, Ogham, Ol_Chiki, Old_Italic, Old_Persian, Oriya, Osmanya, Phags_Pa, Phoenician, Rejang, Runic, Saurashtra, Shavian, Sinhala, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tamil, Telugu, Thaana, Thai, Tibetan, Tifinagh, Ugaritic, Vai, and Yi.
|
75
|
+
#
|
76
|
+
# @example
|
77
|
+
# KeepFilter.evaluate("Doom 3", :L) #=> "Doom" keep only letters
|
78
|
+
# KeepFilter.evaluate("Doom 3", :N) #=> "3" keep only numbers
|
79
|
+
# KeepFilter.evaluate("Doom 3", :L, :N) #=> "Doom3" keep only letters and numbers
|
80
|
+
# KeepFilter.evaluate("Doom 3", :Lu, :N) #=> "D3" keep only uppercased letters or numbers
|
81
|
+
# KeepFilter.evaluate("Doom ˩", :Latin) #=> "Doom" keep only latin characters
|
82
|
+
#
|
83
|
+
# @example Using as ActiveRecord::Base normalizer
|
84
|
+
# normalize :attribute_a, :with => [[:keep, :Lu]]
|
85
|
+
# normalize :attribute_b, :with => [{:keep =>[:Lu]}]
|
86
|
+
# normalize :attribute_c, :with => [:custom_filter, [:keep, :Ll, :Space]]
|
87
|
+
# normalize :attribute_d, :with => [:custom_filter, {:keep => [:Ll, :Space]}]
|
88
|
+
#
|
89
|
+
# @param [String] input A character sequence
|
90
|
+
# @param [[Symbol]*] properties Array of Symbols equivalent to Regexp property for \\p{} construct.
|
91
|
+
# @return [String] The clean character sequence or the object itself
|
92
|
+
# @see http://www.ruby-doc.org/core-1.9.3/Regexp.html Regexp
|
93
|
+
# @see RemoveFilter Normatron::Filters::RemoveFilter
|
94
|
+
# @todo Raise exception for empty properties
|
95
|
+
def self.evaluate(input, *properties)
|
96
|
+
input.kind_of?(String) ? evaluate_regexp(input, :keep, properties) : input
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module RemoveFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Remove the characters that match the given properties.
|
10
|
+
#
|
11
|
+
# For additional informations see Normatron::Filter::ClassMethods#keep documentation.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# RemoveFilter.evaluate("Quake 3", :L) #=> " 3" remove only letters
|
15
|
+
# RemoveFilter.evaluate("Quake 3", :N) #=> "Quake " remove only numbers
|
16
|
+
# RemoveFilter.evaluate("Quake 3", :L, :N) #=> " " remove only letters or numbers
|
17
|
+
# RemoveFilter.evaluate("Quake 3", :Lu, :N) #=> "uake " remove only uppercased letters or numbers
|
18
|
+
# RemoveFilter.evaluate("Quake ˩", :Latin) #=> " ˩" remove only latin characters
|
19
|
+
#
|
20
|
+
# @example Using as ActiveRecord::Base normalizer
|
21
|
+
# normalize :attribute_a, :with => [[:remove, :Lu]]
|
22
|
+
# normalize :attribute_b, :with => [{:remove =>[:Lu]}]
|
23
|
+
# normalize :attribute_c, :with => [:custom_filter, [:remove, :Ll, :Space]]
|
24
|
+
# normalize :attribute_d, :with => [:custom_filter, {:remove => [:Ll, :Space]}]
|
25
|
+
#
|
26
|
+
# @param [String] input A character sequence
|
27
|
+
# @param [[Symbol]*] properties Array of Symbols equivalent to Regexp property for \\p{} construct.
|
28
|
+
# @return [String] The clean character sequence or the object itself
|
29
|
+
# @see http://www.ruby-doc.org/core-1.9.3/Regexp.html Regexp
|
30
|
+
# @see KeepFilter Normatron::Filters::KeepFilter
|
31
|
+
# @todo Raise exception for empty properties
|
32
|
+
def self.evaluate(input, *properties)
|
33
|
+
input.kind_of?(String) ? evaluate_regexp(input, :remove, properties) : input
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Normatron
|
2
|
+
module Filters
|
3
|
+
module SqueezeFilter
|
4
|
+
|
5
|
+
##
|
6
|
+
# Remove multiple occurences of the same character.
|
7
|
+
# If no option are given, all runs of identical characters are replaced by a single character.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# SqueezeFilter.evaluate("yellow moon") #=> "yelow mon"
|
11
|
+
# SqueezeFilter.evaluate(" now is the", " ") #=> " now is the"
|
12
|
+
# SqueezeFilter.evaluate("putters shoot balls", "m-z") #=> "puters shot balls"
|
13
|
+
#
|
14
|
+
# @example Using as ActiveRecord::Base normalizer
|
15
|
+
# normalize :attribute_a, :with => [:custom_filter, :squeeze]
|
16
|
+
# normalize :attribute_b, :with => [:custom_filter, [:squeeze, "a-f"]]
|
17
|
+
# normalize :attribute_c, :with => [:custom_filter, {:squeeze => ["a-f"]}]
|
18
|
+
#
|
19
|
+
# @param [String] input A character sequence
|
20
|
+
# @param [[String]*] targets Characters to be affected
|
21
|
+
# @return [String] The clean character sequence or the object itself
|
22
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-squeeze String#squeeze
|
23
|
+
# @see SquishFilter Normatron::Filters::SquishFilter
|
24
|
+
def self.evaluate(input, *targets)
|
25
|
+
return input unless input.kind_of?(String)
|
26
|
+
targets.any? ? input.squeeze(targets.last) : input.squeeze
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|