normatron 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +35 -48
- data/Rakefile +6 -7
- data/lib/generators/normatron/install_generator.rb +23 -0
- data/lib/generators/normatron/templates/normatron.rb +4 -0
- data/lib/normatron.rb +22 -8
- data/lib/normatron/configuration.rb +26 -22
- data/lib/normatron/extensions.rb +8 -0
- data/lib/normatron/extensions/active_record.rb +20 -15
- data/lib/normatron/filters.rb +26 -379
- data/lib/normatron/filters/blank_filter.rb +29 -0
- data/lib/normatron/filters/camelize_filter.rb +50 -0
- data/lib/normatron/filters/capitalize_filter.rb +29 -0
- data/lib/normatron/filters/chomp_filter.rb +34 -0
- data/lib/normatron/filters/dasherize_filter.rb +25 -0
- data/lib/normatron/filters/downcase_filter.rb +29 -0
- data/lib/normatron/filters/dump_filter.rb +27 -0
- data/lib/normatron/filters/helpers.rb +44 -0
- data/lib/normatron/filters/keep_filter.rb +100 -0
- data/lib/normatron/filters/remove_filter.rb +37 -0
- data/lib/normatron/filters/squeeze_filter.rb +30 -0
- data/lib/normatron/filters/squish_filter.rb +28 -0
- data/lib/normatron/filters/strip_filter.rb +33 -0
- data/lib/normatron/filters/swapcase_filter.rb +30 -0
- data/lib/normatron/filters/titleize_filter.rb +29 -0
- data/lib/normatron/filters/underscore_filter.rb +45 -0
- data/lib/normatron/filters/upcase_filter.rb +29 -0
- data/lib/normatron/version.rb +3 -0
- data/spec/normatron/configuration_spec.rb +60 -0
- data/spec/normatron/extensions/active_record_spec.rb +96 -0
- data/spec/normatron/filters/blank_filter_spec.rb +15 -0
- data/spec/normatron/filters/camelize_filter_spec.rb +42 -0
- data/spec/normatron/filters/capitalize_filter_spec.rb +14 -0
- data/spec/normatron/filters/chomp_filter_spec.rb +15 -0
- data/spec/normatron/filters/dasherize_filter_spec.rb +9 -0
- data/spec/normatron/filters/downcase_filter_spec.rb +10 -0
- data/spec/normatron/filters/dump_filter_spec.rb +10 -0
- data/spec/normatron/filters/keep_filter_spec.rb +86 -0
- data/spec/normatron/filters/remove_filter_spec.rb +86 -0
- data/spec/normatron/filters/squeeze_filter_spec.rb +10 -0
- data/spec/normatron/filters/squish_filter_spec.rb +12 -0
- data/spec/normatron/filters/strip_filter_spec.rb +12 -0
- data/spec/normatron/filters/swapcase_filter_spec.rb +12 -0
- data/spec/normatron/filters/titleize_filter_spec.rb +12 -0
- data/spec/normatron/filters/underscore_filter_spec.rb +26 -0
- data/spec/normatron/filters/upcase_filter_spec.rb +10 -0
- data/spec/normatron_spec.rb +28 -2
- data/spec/spec_helper.rb +37 -4
- data/spec/support/my_filters.rb +7 -0
- data/spec/support/user_model.rb +14 -0
- metadata +64 -13
- data/spec/configuration_spec.rb +0 -53
- data/spec/extensions/active_record_spec.rb +0 -114
- data/spec/filters_spec.rb +0 -442
- data/spec/support/model_model.rb +0 -3
- data/spec/support/schema.rb +0 -7
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module BlankFilter
|
6
|
+
|
7
|
+
##
|
8
|
+
# Returns nil for a blank string or the string itself otherwise.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# BlankFilter.evaluate("") #=> nil
|
12
|
+
# BlankFilter.evaluate(" ") #=> nil
|
13
|
+
# BlankFilter.evaluate(" \n ") #=> nil
|
14
|
+
# BlankFilter.evaluate("1") #=> "1"
|
15
|
+
# BlankFilter.evaluate("It's blank?") #=> "It's blank?"
|
16
|
+
#
|
17
|
+
# @example Using as ActiveRecord::Base normalizer
|
18
|
+
# normalize :attribute_a, :with => :blank
|
19
|
+
# normalize :attribute_b, :with => [:custom_filter, :blank]
|
20
|
+
#
|
21
|
+
# @param input [String] A character sequence
|
22
|
+
# @return [String, nil] The object itself or nil
|
23
|
+
# @see http://api.rubyonrails.org/classes/String.html#method-i-blank-3F String#blank?
|
24
|
+
def self.evaluate(input)
|
25
|
+
input.kind_of?(String) && input.blank? ? nil : input
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module CamelizeFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Converts strings to UpperCamelCase by default and to lowerCamelCase if the <tt>:lower</tt> argument is given.
|
10
|
+
# <tt>camelize</tt> will also convert '/' to '::' which is useful for converting paths to namespaces.
|
11
|
+
# As a rule of thumb you can think of camelize as the inverse of underscore, though there are cases where that does not hold:
|
12
|
+
# "SSLError".underscore.camelize # => "SslError"
|
13
|
+
# This filter has a similar behavior to
|
14
|
+
# ActiveSupport::Inflector#camelize[http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-camelize], but with following differences:
|
15
|
+
# * Uses UTF-8 charset
|
16
|
+
# * Affects accented characters
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# CamelizeFilter.evaluate("active_record/errors") #=> "ActiveRecord::Errors"
|
20
|
+
# CamelizeFilter.evaluate("active_record/errors", :upper) #=> "ActiveRecord::Errors"
|
21
|
+
# CamelizeFilter.evaluate("active_record/errors", :lower) #=> "activeRecord::Errors"
|
22
|
+
#
|
23
|
+
# @example Using as ActiveRecord::Base normalizer
|
24
|
+
# normalize :attribute_a, :with => :camelize
|
25
|
+
# normalize :attribute_b, :with => [:custom_filter, :camelize]
|
26
|
+
# normalize :attribute_c, :with => [[:camelize, :lower]]
|
27
|
+
# normalize :attribute_d, :with => [{:camelize => :lower}]
|
28
|
+
# normalize :attribute_e, :with => [:custom_filter, [:camelize, :lower]]
|
29
|
+
# normalize :attribute_f, :with => [:custom_filter, {:camelize => :lower}]
|
30
|
+
#
|
31
|
+
# @param [String] input A character sequence
|
32
|
+
# @param [Symbol] first_letter_case <tt>:lower</tt> for lowerCamelCase or <tt>:upper</tt> for UpperCamelCase
|
33
|
+
# @return [String] The camelized character sequence or the object itself
|
34
|
+
# @see http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-camelize ActiveSupport::Inflector#camelize
|
35
|
+
# @see UnderscoreFilter Normatron::Filters::UnderscoreFilter
|
36
|
+
# @todo Performance tests
|
37
|
+
# @todo Exception class
|
38
|
+
def self.evaluate(input, first_letter_case = :upper)
|
39
|
+
return input unless input.kind_of?(String)
|
40
|
+
|
41
|
+
if first_letter_case == :upper
|
42
|
+
string = input.sub(/^[\p{L}\d]*/u) { acronyms[$&] || mb_send(:capitalize, $&) }
|
43
|
+
else first_letter_case == :lower
|
44
|
+
string = input.sub(/^(?:#{acronym_regex}(?=\b|[\p{L}_])|\p{Word}*_)/u) { mb_send(:downcase, $&) }
|
45
|
+
end
|
46
|
+
string.gsub!(/(?:_|(\/))([\p{L}\d]*)/iu) { "#{$1}#{acronyms[$2] || mb_send(:capitalize, $2)}" }.gsub!('/', '::')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module CapitalizeFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Makes the first character uppercase after lowercase all other characters.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# CapitalizeFilter.evaluate("KEEP IT SIMPLE") #=> "Keep it simple"
|
13
|
+
# CapitalizeFilter.evaluate("keep it simple") #=> "Keep it simple"
|
14
|
+
# CapitalizeFilter.evaluate(" KEEP IT SIMPLE") #=> " keep it simple"
|
15
|
+
#
|
16
|
+
# @example Using as ActiveRecord::Base normalizer
|
17
|
+
# normalize :attribute_a, :with => :capitalize
|
18
|
+
# normalize :attribute_b, :with => [:custom_filter, :capitalize]
|
19
|
+
#
|
20
|
+
# @param [String] input A character sequence
|
21
|
+
# @return [String] The capitalized character sequence or the object itself
|
22
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-capitalize String#capitalize
|
23
|
+
# @see TitleizeFilter Normatron::Filters::TitleizeFilter
|
24
|
+
def self.evaluate(input)
|
25
|
+
input.kind_of?(String) ? mb_send(:capitalize, input) : input
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Normatron
|
2
|
+
module Filters
|
3
|
+
module ChompFilter
|
4
|
+
|
5
|
+
##
|
6
|
+
# Remove the given record separator from the end of the string (If present).
|
7
|
+
# If <tt>$/</tt> has not been changed from the default Ruby record separator,
|
8
|
+
# then chomp also removes carriage return characters (that is it will remove <tt>\n</tt>, <tt>\r</tt>, and <tt>\r\n</tt>).
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# ChompFilter.evaluate("Bon Scott\n") #=> "Bon Scott"
|
12
|
+
# ChompFilter.evaluate("Bon Scott\r") #=> "Bon Scott"
|
13
|
+
# ChompFilter.evaluate("Bon Scott\r\n") #=> "Bon Scott"
|
14
|
+
# ChompFilter.evaluate("Bon Scott\n\r") #=> "Bon Scott\n"
|
15
|
+
# ChompFilter.evaluate("Bon Scott", " Scott") #=> "Bon"
|
16
|
+
#
|
17
|
+
# @example Using as ActiveRecord::Base normalizer
|
18
|
+
# normalize :attribute_a, :with => :chomp
|
19
|
+
# normalize :attribute_b, :with => [:custom_filter, :chomp]
|
20
|
+
# normalize :attribute_c, :with => [[:chomp, "x"]]
|
21
|
+
# normalize :attribute_d, :with => [{:chomp => "y"}]
|
22
|
+
# normalize :attribute_e, :with => [:custom_filter, [:chomp, "z"]]
|
23
|
+
# normalize :attribute_f, :with => [:custom_filter, {:chomp => "\f"}]
|
24
|
+
#
|
25
|
+
# @param [String] input A character sequence
|
26
|
+
# @param [String] separator A character sequence
|
27
|
+
# @return [String] The chopped character sequence or the object itself
|
28
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-chomp String#chomp
|
29
|
+
def self.evaluate(input, separator=$/)
|
30
|
+
input.kind_of?(String) ? input.chomp(separator) : input
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module DasherizeFilter
|
6
|
+
|
7
|
+
##
|
8
|
+
# Replaces all underscores with dashes.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# DasherizeFilter.evaluate("monty_python") #=> "monty-python"
|
12
|
+
#
|
13
|
+
# @example Using as ActiveRecord::Base normalizer
|
14
|
+
# normalize :attribute_a, :with => :dasherize
|
15
|
+
# normalize :attribute_b, :with => [:custom_filter, :dasherize]
|
16
|
+
#
|
17
|
+
# @param [String] input A character sequence
|
18
|
+
# @return [String] The dasherized character sequence or the object itself
|
19
|
+
# @see http://api.rubyonrails.org/classes/String.html#method-i-dasherize String#dasherize
|
20
|
+
def self.evaluate(input)
|
21
|
+
input.kind_of?(String) ? input.dasherize : input
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module DowncaseFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Lowercase all characters.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# DowncaseFilter.evaluate("NOTHING ELSE MATTERS") #=> "nothing else matters"
|
13
|
+
#
|
14
|
+
# @example Using as ActiveRecord::Base normalizer
|
15
|
+
# normalize :attribute_a, :with => :downcase
|
16
|
+
# normalize :attribute_b, :with => [:custom_filter, :downcase]
|
17
|
+
#
|
18
|
+
# @param [String] input A character sequence
|
19
|
+
# @return [String] The lowercased character sequence or the object itself
|
20
|
+
# @see http://api.rubyonrails.org/classes/ActiveSupport/Multibyte/Chars.html#method-i-downcase ActiveSupport::Multibyte::Chars#downcase
|
21
|
+
# @see SwapcaseFilter Normatron::Filters::SwapcaseFilter
|
22
|
+
# @see TitleizeFilter Normatron::Filters::TitleizeFilter
|
23
|
+
# @see UpcaseFilter Normatron::Filters::UpcaseFilter
|
24
|
+
def self.evaluate(input)
|
25
|
+
input.kind_of?(String) ? mb_send(:downcase, input) : input
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Normatron
|
2
|
+
module Filters
|
3
|
+
module DumpFilter
|
4
|
+
|
5
|
+
##
|
6
|
+
# Creates a literal string representation with all nonprinting characters
|
7
|
+
# replaced by <tt>\\n</tt> notation and all special characters escaped.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# DumpFilter.evaluate("I'm not\na \"clubber\"...") #=> "\"I'm not\\na \\\"clubber\\\"...\""
|
11
|
+
# DumpFilter.evaluate("I'm not\na \"clubber\"...") #== '"I\'m not\na \"clubber\"..."'
|
12
|
+
# DumpFilter.evaluate('I\'m not\na "clubber"...') #=> "\"I'm not\\\\na \\\"clubber\\\"...\""
|
13
|
+
# DumpFilter.evaluate('I\'m not\na "clubber"...') #== '"I\'m not\\\na \"clubber\"..."'
|
14
|
+
#
|
15
|
+
# @example Using as ActiveRecord::Base normalizer
|
16
|
+
# normalize :attribute_a, :with => :dump
|
17
|
+
# normalize :attribute_b, :with => [:custom_filter, :dump]
|
18
|
+
#
|
19
|
+
# @param [String] input A character sequence
|
20
|
+
# @return [String] The dumpped character sequence or the object itself
|
21
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-dump String#dump
|
22
|
+
def self.evaluate(input)
|
23
|
+
input.kind_of?(String) ? input.dump : input
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'active_support/core_ext/string'
|
4
|
+
require 'active_support/inflector/inflections'
|
5
|
+
require 'active_support/multibyte/chars'
|
6
|
+
|
7
|
+
module Normatron
|
8
|
+
module Filters
|
9
|
+
module Helpers
|
10
|
+
def evaluate_regexp(value, action, *properties)
|
11
|
+
options = properties.flatten.compact.uniq
|
12
|
+
constructs = options.map{ |s| "\\p{#{s.to_s}}" } * ""
|
13
|
+
regex = /[#{'^' if action == :keep}#{constructs}]/u
|
14
|
+
value.gsub(regex, "")
|
15
|
+
end
|
16
|
+
|
17
|
+
def evaluate_strip(value, edges)
|
18
|
+
constructs = []
|
19
|
+
constructs << '\A\s*' if edges == :L || edges == :LR
|
20
|
+
constructs << '\s*\z' if edges == :R || edges == :LR
|
21
|
+
regex_string = constructs.join '|'
|
22
|
+
regex = Regexp.new(/#{regex_string}/)
|
23
|
+
|
24
|
+
value.gsub(regex, '')
|
25
|
+
end
|
26
|
+
|
27
|
+
def acronyms
|
28
|
+
inflections.acronyms
|
29
|
+
end
|
30
|
+
|
31
|
+
def acronym_regex
|
32
|
+
inflections.acronym_regex
|
33
|
+
end
|
34
|
+
|
35
|
+
def inflections
|
36
|
+
ActiveSupport::Inflector::Inflections.instance
|
37
|
+
end
|
38
|
+
|
39
|
+
def mb_send(method, value)
|
40
|
+
value.mb_chars.send(method).to_s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module KeepFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Remove the characters that doesn't match the given properties.
|
10
|
+
# The character properties follow the rule of \\p{} construct described in Regexp class.
|
11
|
+
# The \\p{} construct matches characters with the named property, much like POSIX bracket classes.
|
12
|
+
#
|
13
|
+
# To pass named properties to this filter, use them as Symbols:
|
14
|
+
# * <tt>:Alnum</tt> - Alphabetic and numeric character
|
15
|
+
# * <tt>:Alpha</tt> - Alphabetic character
|
16
|
+
# * <tt>:Blank</tt> - Space or tab
|
17
|
+
# * <tt>:Cntrl</tt> - Control character
|
18
|
+
# * <tt>:Digit</tt> - Digit
|
19
|
+
# * <tt>:Graph</tt> - Non-blank character (excludes spaces, control characters, and similar)
|
20
|
+
# * <tt>:Lower</tt> - Lowercase alphabetical character
|
21
|
+
# * <tt>:Print</tt> - Like :Graph, but includes the space character
|
22
|
+
# * <tt>:Punct</tt> - Punctuation character
|
23
|
+
# * <tt>:Space</tt> - Whitespace character ([:blank:], newline, carriage return, etc.)
|
24
|
+
# * <tt>:Upper</tt> - Uppercase alphabetical
|
25
|
+
# * <tt>:XDigit</tt> - Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
|
26
|
+
# * <tt>:Word</tt> - A member of one of the following Unicode general category Letter, Mark, Number, Connector_Punctuation
|
27
|
+
# * <tt>:ASCII</tt> - A character in the ASCII character set
|
28
|
+
# * <tt>:Any</tt> - Any Unicode character (including unassigned characters)
|
29
|
+
# * <tt>:Assigned</tt> - An assigned character
|
30
|
+
#
|
31
|
+
# A Unicode character's General Category value can also be matched with :Ab where Ab is the category’s abbreviation as described below:
|
32
|
+
# * <tt>:L</tt> - 'Letter'
|
33
|
+
# * <tt>:Ll</tt> - 'Letter: Lowercase'
|
34
|
+
# * <tt>:Lm</tt> - 'Letter: Mark'
|
35
|
+
# * <tt>:Lo</tt> - 'Letter: Other'
|
36
|
+
# * <tt>:Lt</tt> - 'Letter: Titlecase'
|
37
|
+
# * <tt>:Lu</tt> - 'Letter: Uppercase
|
38
|
+
# * <tt>:Lo</tt> - 'Letter: Other'
|
39
|
+
# * <tt>:M</tt> - 'Mark'
|
40
|
+
# * <tt>:Mn</tt> - 'Mark: Nonspacing'
|
41
|
+
# * <tt>:Mc</tt> - 'Mark: Spacing Combining'
|
42
|
+
# * <tt>:Me</tt> - 'Mark: Enclosing'
|
43
|
+
# * <tt>:N</tt> - 'Number'
|
44
|
+
# * <tt>:Nd</tt> - 'Number: Decimal Digit'
|
45
|
+
# * <tt>:Nl</tt> - 'Number: Letter'
|
46
|
+
# * <tt>:No</tt> - 'Number: Other'
|
47
|
+
# * <tt>:P</tt> - 'Punctuation'
|
48
|
+
# * <tt>:Pc</tt> - 'Punctuation: Connector'
|
49
|
+
# * <tt>:Pd</tt> - 'Punctuation: Dash'
|
50
|
+
# * <tt>:Ps</tt> - 'Punctuation: Open'
|
51
|
+
# * <tt>:Pe</tt> - 'Punctuation: Close'
|
52
|
+
# * <tt>:Pi</tt> - 'Punctuation: Initial Quote'
|
53
|
+
# * <tt>:Pf</tt> - 'Punctuation: Final Quote'
|
54
|
+
# * <tt>:Po</tt> - 'Punctuation: Other'
|
55
|
+
# * <tt>:S</tt> - 'Symbol'
|
56
|
+
# * <tt>:Sm</tt> - 'Symbol: Math'
|
57
|
+
# * <tt>:Sc</tt> - 'Symbol: Currency'
|
58
|
+
# * <tt>:Sc</tt> - 'Symbol: Currency'
|
59
|
+
# * <tt>:Sk</tt> - 'Symbol: Modifier'
|
60
|
+
# * <tt>:So</tt> - 'Symbol: Other'
|
61
|
+
# * <tt>:Z</tt> - 'Separator'
|
62
|
+
# * <tt>:Zs</tt> - 'Separator: Space'
|
63
|
+
# * <tt>:Zl</tt> - 'Separator: Line'
|
64
|
+
# * <tt>:Zp</tt> - 'Separator: Paragraph'
|
65
|
+
# * <tt>:C</tt> - 'Other'
|
66
|
+
# * <tt>:Cc</tt> - 'Other: Control'
|
67
|
+
# * <tt>:Cf</tt> - 'Other: Format'
|
68
|
+
# * <tt>:Cn</tt> - 'Other: Not Assigned'
|
69
|
+
# * <tt>:Co</tt> - 'Other: Private Use'
|
70
|
+
# * <tt>:Cs</tt> - 'Other: Surrogate'
|
71
|
+
#
|
72
|
+
# Lastly, this method matches a character's Unicode script. The following scripts are supported:
|
73
|
+
#
|
74
|
+
# Arabic, Armenian, Balinese, Bengali, Bopomofo, Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot, Cyrillic, Deseret, Devanagari, Ethiopic, Georgian, Glagolitic, Gothic, Greek, Gujarati, Gurmukhi, Han, Hangul, Hanunoo, Hebrew, Hiragana, Inherited, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Lao, Latin, Lepcha, Limbu, Linear_B, Lycian, Lydian, Malayalam, Mongolian, Myanmar, New_Tai_Lue, Nko, Ogham, Ol_Chiki, Old_Italic, Old_Persian, Oriya, Osmanya, Phags_Pa, Phoenician, Rejang, Runic, Saurashtra, Shavian, Sinhala, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tamil, Telugu, Thaana, Thai, Tibetan, Tifinagh, Ugaritic, Vai, and Yi.
|
75
|
+
#
|
76
|
+
# @example
|
77
|
+
# KeepFilter.evaluate("Doom 3", :L) #=> "Doom" keep only letters
|
78
|
+
# KeepFilter.evaluate("Doom 3", :N) #=> "3" keep only numbers
|
79
|
+
# KeepFilter.evaluate("Doom 3", :L, :N) #=> "Doom3" keep only letters and numbers
|
80
|
+
# KeepFilter.evaluate("Doom 3", :Lu, :N) #=> "D3" keep only uppercased letters or numbers
|
81
|
+
# KeepFilter.evaluate("Doom ˩", :Latin) #=> "Doom" keep only latin characters
|
82
|
+
#
|
83
|
+
# @example Using as ActiveRecord::Base normalizer
|
84
|
+
# normalize :attribute_a, :with => [[:keep, :Lu]]
|
85
|
+
# normalize :attribute_b, :with => [{:keep =>[:Lu]}]
|
86
|
+
# normalize :attribute_c, :with => [:custom_filter, [:keep, :Ll, :Space]]
|
87
|
+
# normalize :attribute_d, :with => [:custom_filter, {:keep => [:Ll, :Space]}]
|
88
|
+
#
|
89
|
+
# @param [String] input A character sequence
|
90
|
+
# @param [[Symbol]*] properties Array of Symbols equivalent to Regexp property for \\p{} construct.
|
91
|
+
# @return [String] The clean character sequence or the object itself
|
92
|
+
# @see http://www.ruby-doc.org/core-1.9.3/Regexp.html Regexp
|
93
|
+
# @see RemoveFilter Normatron::Filters::RemoveFilter
|
94
|
+
# @todo Raise exception for empty properties
|
95
|
+
def self.evaluate(input, *properties)
|
96
|
+
input.kind_of?(String) ? evaluate_regexp(input, :keep, properties) : input
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'normatron/filters/helpers'
|
2
|
+
|
3
|
+
module Normatron
|
4
|
+
module Filters
|
5
|
+
module RemoveFilter
|
6
|
+
extend Helpers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Remove the characters that match the given properties.
|
10
|
+
#
|
11
|
+
# For additional informations see Normatron::Filter::ClassMethods#keep documentation.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# RemoveFilter.evaluate("Quake 3", :L) #=> " 3" remove only letters
|
15
|
+
# RemoveFilter.evaluate("Quake 3", :N) #=> "Quake " remove only numbers
|
16
|
+
# RemoveFilter.evaluate("Quake 3", :L, :N) #=> " " remove only letters or numbers
|
17
|
+
# RemoveFilter.evaluate("Quake 3", :Lu, :N) #=> "uake " remove only uppercased letters or numbers
|
18
|
+
# RemoveFilter.evaluate("Quake ˩", :Latin) #=> " ˩" remove only latin characters
|
19
|
+
#
|
20
|
+
# @example Using as ActiveRecord::Base normalizer
|
21
|
+
# normalize :attribute_a, :with => [[:remove, :Lu]]
|
22
|
+
# normalize :attribute_b, :with => [{:remove =>[:Lu]}]
|
23
|
+
# normalize :attribute_c, :with => [:custom_filter, [:remove, :Ll, :Space]]
|
24
|
+
# normalize :attribute_d, :with => [:custom_filter, {:remove => [:Ll, :Space]}]
|
25
|
+
#
|
26
|
+
# @param [String] input A character sequence
|
27
|
+
# @param [[Symbol]*] properties Array of Symbols equivalent to Regexp property for \\p{} construct.
|
28
|
+
# @return [String] The clean character sequence or the object itself
|
29
|
+
# @see http://www.ruby-doc.org/core-1.9.3/Regexp.html Regexp
|
30
|
+
# @see KeepFilter Normatron::Filters::KeepFilter
|
31
|
+
# @todo Raise exception for empty properties
|
32
|
+
def self.evaluate(input, *properties)
|
33
|
+
input.kind_of?(String) ? evaluate_regexp(input, :remove, properties) : input
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Normatron
|
2
|
+
module Filters
|
3
|
+
module SqueezeFilter
|
4
|
+
|
5
|
+
##
|
6
|
+
# Remove multiple occurences of the same character.
|
7
|
+
# If no option are given, all runs of identical characters are replaced by a single character.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# SqueezeFilter.evaluate("yellow moon") #=> "yelow mon"
|
11
|
+
# SqueezeFilter.evaluate(" now is the", " ") #=> " now is the"
|
12
|
+
# SqueezeFilter.evaluate("putters shoot balls", "m-z") #=> "puters shot balls"
|
13
|
+
#
|
14
|
+
# @example Using as ActiveRecord::Base normalizer
|
15
|
+
# normalize :attribute_a, :with => [:custom_filter, :squeeze]
|
16
|
+
# normalize :attribute_b, :with => [:custom_filter, [:squeeze, "a-f"]]
|
17
|
+
# normalize :attribute_c, :with => [:custom_filter, {:squeeze => ["a-f"]}]
|
18
|
+
#
|
19
|
+
# @param [String] input A character sequence
|
20
|
+
# @param [[String]*] targets Characters to be affected
|
21
|
+
# @return [String] The clean character sequence or the object itself
|
22
|
+
# @see http://www.ruby-doc.org/core-1.9.3/String.html#method-i-squeeze String#squeeze
|
23
|
+
# @see SquishFilter Normatron::Filters::SquishFilter
|
24
|
+
def self.evaluate(input, *targets)
|
25
|
+
return input unless input.kind_of?(String)
|
26
|
+
targets.any? ? input.squeeze(targets.last) : input.squeeze
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|