string_normalizr 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ Copyright (c) 2010, Carsten Zimmermann
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+ * Neither the name of the original author / copyright holder nor the
12
+ names of its contributors may be used to endorse or promote products
13
+ derived from this software without specific prior written permission.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/Manifest CHANGED
@@ -1,8 +1,8 @@
1
+ LICENSE
1
2
  README.rdoc
2
3
  Rakefile
3
4
  init.rb
4
5
  lib/string_normalizr.rb
5
6
  nbproject/private/rake-d.txt
6
- string_normalizr.gemspec
7
7
  test/string_normalizr_test.rb
8
8
  Manifest
data/README.rdoc CHANGED
@@ -6,7 +6,7 @@ of your String instance.
6
6
 
7
7
 
8
8
  === Installation
9
- $ gem install carpodaster-string_normalizr --source http://gems.github.com
9
+ $ gem install string_normalizr
10
10
 
11
11
  Or use it as a plugin with your Rails app:
12
12
 
@@ -15,12 +15,14 @@ Or use it as a plugin with your Rails app:
15
15
  === Usage
16
16
  "hellö world".normalize
17
17
 
18
- === Caveats
19
- * Tests seem to have difficulties with multibyte characters right now.
20
-
21
18
  === .plan
22
- * Add option-hash to customize the normalization
19
+ * support custom collation hash
20
+ * support more special chars by default
21
+ * handle punctuation marks
23
22
 
23
+ === Changelog
24
+ * 0.2: Normalization can now be customized via an options hash.
25
+ * 0.1: Initial version
24
26
 
25
27
  ---
26
28
 
data/Rakefile CHANGED
@@ -9,7 +9,7 @@ Rake::TestTask.new("test") do |t|
9
9
  t.verbose = false
10
10
  end
11
11
 
12
- Echoe.new('string_normalizr', '0.1') do |p|
12
+ Echoe.new('string_normalizr', '0.2') do |p|
13
13
  p.description = "Let String instances be conviently normalized"
14
14
  p.url = "http://github.com/carpodaster/string_normalizr"
15
15
  p.author = "Carsten Zimmermann"
@@ -1,20 +1,110 @@
1
- module AegisNet
1
+ # -*- coding: utf-8 -*-"
2
+ require "rubygems"
3
+ module AegisNet # :nodoc:
2
4
  module StringNormalizr
3
5
 
6
+ COLLATION = {
7
+ 'Ä' => 'Ae',
8
+ 'Æ' => 'Ae',
9
+ 'Å' => 'A',
10
+ 'À' => 'A',
11
+ 'Á' => 'A',
12
+ 'Â' => 'A',
13
+ 'Ç' => 'C',
14
+ 'È' => "E",
15
+ 'É' => "E",
16
+ 'Ê' => "E",
17
+ 'Ë' => 'E',
18
+ 'Í' => 'I',
19
+ 'Ì' => 'I',
20
+ 'Î' => 'I',
21
+ 'Ï' => 'I',
22
+ 'Ñ' => 'N',
23
+ 'Ö' => 'Oe',
24
+ 'Œ' => 'Oe',
25
+ 'Ø' => 'O',
26
+ 'Ô' => 'O',
27
+ 'Ó' => 'O',
28
+ 'Ò' => 'O',
29
+ 'Ü' => 'Ue',
30
+ 'Ú' => 'U',
31
+ 'Ù' => 'U',
32
+ 'Ÿ' => 'Y',
33
+ 'ä' => 'ae',
34
+ 'æ' => 'ae',
35
+ 'å' => 'a',
36
+ 'à' => 'a',
37
+ 'á' => 'a',
38
+ 'â' => 'a',
39
+ 'ç' => 'c',
40
+ 'è' => 'e',
41
+ 'é' => 'e',
42
+ 'ê' => 'e',
43
+ 'ë' => 'e',
44
+ 'í' => 'i',
45
+ 'ì' => 'i',
46
+ 'î' => 'i',
47
+ 'ï' => 'i',
48
+ 'ñ' => 'n',
49
+ 'ö' => 'oe',
50
+ 'œ' => 'oe',
51
+ 'ø' => 'o',
52
+ 'ô' => 'o',
53
+ 'ó' => 'o',
54
+ 'ò' => 'o',
55
+ 'ü' => 'ue',
56
+ 'ú' => 'u',
57
+ 'ù' => 'u',
58
+ 'ÿ' => 'y',
59
+ 'ß' => 'ss',
60
+ }
61
+
4
62
  def self.included(base)
5
63
  base.send(:include, InstanceMethods)
6
64
  end
7
65
 
8
-
9
66
  module InstanceMethods
10
- def normalize
11
- self.strip.gsub('ä', 'ae').gsub('ö', 'oe').gsub('ü', 'ue').gsub('Ä', 'Ae').gsub('Ö', 'Oe').gsub('Ü', 'Ue').gsub("ß", "ss")
67
+
68
+ # Returns a new String based on pre-defined normalization rules
69
+ #
70
+ # == Parameters
71
+ # * +options+: optional Hash for normalization customization
72
+ #
73
+ # == Available options
74
+ # * <tt>:strip</tt> - trim leading and trailing whitespaces (true|false, default: true)
75
+ # * <tt>:replace_whitespaces</tt> - replace whitespaces within the string with +str+
76
+ # or set to +false+ to leave whitespaces alone. Makes little
77
+ # sense w/o :strip => true (str|false, default: "-")
78
+ #
79
+ # == Examples
80
+ # "This is án exåmple".normalize
81
+ # => "This-is-an-example
82
+ #
83
+ # "Tëst string with träiling whitespaces ".normalize(:replace_whitespaces => false)
84
+ # => "Test string with traeiling whitespaces"
85
+ #
86
+ def normalize(options = {})
87
+ # shamelessly taken from ActiveSupport::ActiveSupport::Hash::Keys#assert_valid_keys
88
+ valid_keys = [:replace_whitespaces, :strip]
89
+ unknown_keys = options.keys - [valid_keys].flatten
90
+ raise(ArgumentError, "Unknown key(s): #{unknown_keys.join(", ")}") unless unknown_keys.empty?
91
+
92
+ # Default options
93
+ options = {
94
+ :downcase => false,
95
+ :strip => true,
96
+ :replace_whitespaces => "-"
97
+ }.merge(options)
98
+
99
+ n_str = AegisNet::StringNormalizr::COLLATION.inject(dup) {|str, (collate_from, collate_to)| str.gsub(collate_from, collate_to)}
100
+ n_str.strip! if options[:strip]
101
+ n_str.gsub!(/\s+/, options[:replace_whitespaces]) if options[:replace_whitespaces]
102
+ n_str
12
103
  end
13
104
  end
14
-
15
105
  end
16
106
  end
17
107
 
18
- class String
108
+ class String # :nodoc:
19
109
  include AegisNet::StringNormalizr
20
110
  end
@@ -2,15 +2,15 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{string_normalizr}
5
- s.version = "0.1"
5
+ s.version = "0.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Carsten Zimmermann"]
9
- s.date = %q{2010-09-17}
9
+ s.date = %q{2010-09-20}
10
10
  s.description = %q{Let String instances be conviently normalized}
11
11
  s.email = %q{carp@hacksocke.de}
12
- s.extra_rdoc_files = ["README.rdoc", "lib/string_normalizr.rb"]
13
- s.files = ["README.rdoc", "Rakefile", "init.rb", "lib/string_normalizr.rb", "nbproject/private/rake-d.txt", "string_normalizr.gemspec", "test/string_normalizr_test.rb", "Manifest"]
12
+ s.extra_rdoc_files = ["LICENSE", "README.rdoc", "lib/string_normalizr.rb"]
13
+ s.files = ["LICENSE", "README.rdoc", "Rakefile", "init.rb", "lib/string_normalizr.rb", "nbproject/private/rake-d.txt", "test/string_normalizr_test.rb", "Manifest", "string_normalizr.gemspec"]
14
14
  s.homepage = %q{http://github.com/carpodaster/string_normalizr}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "String_normalizr", "--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -1,22 +1,89 @@
1
+ # -*- coding: utf-8 -*-"
1
2
  require 'test/unit'
2
3
  require File.dirname(__FILE__) + "/../lib/string_normalizr"
3
4
 
4
5
  class StringNormalizrTest < Test::Unit::TestCase
5
-
6
+
6
7
  def test_string_integration
7
8
  assert "some string".respond_to?(:normalize)
8
9
  end
9
10
 
10
11
  def test_whitespaces
12
+ assert_equal "This-is-an-example", "This is an example".normalize
13
+ assert_equal "This is an example", "This is an example".normalize(:replace_whitespaces => false)
14
+ assert_equal "This=is=an=example", "This is an example".normalize(:replace_whitespaces => "=")
15
+
11
16
  assert_equal "foo", " foo \n \t".normalize
17
+ assert_equal "foo \n \t", "foo \n \t".normalize(:strip => false, :replace_whitespaces => false)
18
+ end
19
+
20
+ def test_accents
21
+ assert_equal "a", "á".normalize
22
+ assert_equal "a", "à".normalize
23
+ assert_equal "a", "â".normalize
24
+ assert_equal "A", "Á".normalize
25
+ assert_equal "A", "À".normalize
26
+ assert_equal "A", "Â".normalize
27
+ assert_equal "e", "é".normalize
28
+ assert_equal "e", "è".normalize
29
+ assert_equal "E", "É".normalize
30
+ assert_equal "E", "È".normalize
31
+ assert_equal "i", "í".normalize
32
+ assert_equal "i", "ì".normalize
33
+ assert_equal "i", "î".normalize
34
+ assert_equal "I", "Í".normalize
35
+ assert_equal "I", "Ì".normalize
36
+ assert_equal "I", "Î".normalize
37
+ assert_equal "o", "ó".normalize
38
+ assert_equal "o", "ò".normalize
39
+ assert_equal "o", "ô".normalize
40
+ assert_equal "O", "Ó".normalize
41
+ assert_equal "O", "Ò".normalize
42
+ assert_equal "O", "Ô".normalize
43
+ assert_equal "u", "ú".normalize
44
+ assert_equal "u", "ù".normalize
45
+ assert_equal "U", "Ú".normalize
46
+ assert_equal "U", "Ù".normalize
47
+ end
48
+
49
+ def test_umlauts
50
+ assert_equal "ae", "ä".normalize
51
+ assert_equal "Ae", "Ä".normalize
52
+ assert_equal "oe", "ö".normalize
53
+ assert_equal "Oe", "Ö".normalize
54
+ assert_equal "ue", "ü".normalize
55
+ assert_equal "Ue", "Ü".normalize
56
+ end
57
+
58
+ def test_spanish_chars
59
+ assert_equal "c", "ç".normalize
60
+ assert_equal "C", "Ç".normalize
61
+ assert_equal "n", "ñ".normalize
62
+ assert_equal "N", "Ñ".normalize
63
+ end
64
+
65
+ def test_scandinavian_chars
66
+ assert_equal "a", "å".normalize
67
+ assert_equal "A", "Å".normalize
68
+ assert_equal "o", "ø".normalize
69
+ assert_equal "O", "Ø".normalize
12
70
  end
13
71
 
14
- def test_german_umlauts
15
- assert_equal "foeOebaerueghbAerUegh", "föÖbärüghbÄrÜgh"
72
+ def test_ligatures
73
+ assert_equal "ae", "æ".normalize
74
+ assert_equal "Ae", "Æ".normalize
75
+ assert_equal "oe", "œ".normalize
76
+ assert_equal 'Oe', 'Œ'.normalize
77
+ assert_equal "ss", "ß".normalize
16
78
  end
17
79
 
18
- def test_sz
19
- assert_equal "Strasse", "Straße"
80
+ def test_diaresises
81
+ assert_equal "e", "ë".normalize
82
+ assert_equal "E", "Ë".normalize
83
+ assert_equal "i", "ï".normalize
84
+ assert_equal "I", "Ï".normalize
85
+ assert_equal "y", "ÿ".normalize
86
+ assert_equal "Y", "Ÿ".normalize
20
87
  end
21
88
 
22
89
  end
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_normalizr
3
3
  version: !ruby/object:Gem::Version
4
- hash: 9
4
+ hash: 15
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
- version: "0.1"
8
+ - 2
9
+ version: "0.2"
10
10
  platform: ruby
11
11
  authors:
12
12
  - Carsten Zimmermann
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-17 00:00:00 +02:00
17
+ date: 2010-09-20 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -25,17 +25,19 @@ executables: []
25
25
  extensions: []
26
26
 
27
27
  extra_rdoc_files:
28
+ - LICENSE
28
29
  - README.rdoc
29
30
  - lib/string_normalizr.rb
30
31
  files:
32
+ - LICENSE
31
33
  - README.rdoc
32
34
  - Rakefile
33
35
  - init.rb
34
36
  - lib/string_normalizr.rb
35
37
  - nbproject/private/rake-d.txt
36
- - string_normalizr.gemspec
37
38
  - test/string_normalizr_test.rb
38
39
  - Manifest
40
+ - string_normalizr.gemspec
39
41
  has_rdoc: true
40
42
  homepage: http://github.com/carpodaster/string_normalizr
41
43
  licenses: []