string_normalizr 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ Copyright (c) 2010, Carsten Zimmermann
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+ * Neither the name of the original author / copyright holder nor the
12
+ names of its contributors may be used to endorse or promote products
13
+ derived from this software without specific prior written permission.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/Manifest CHANGED
@@ -1,8 +1,8 @@
1
+ LICENSE
1
2
  README.rdoc
2
3
  Rakefile
3
4
  init.rb
4
5
  lib/string_normalizr.rb
5
6
  nbproject/private/rake-d.txt
6
- string_normalizr.gemspec
7
7
  test/string_normalizr_test.rb
8
8
  Manifest
data/README.rdoc CHANGED
@@ -6,7 +6,7 @@ of your String instance.
6
6
 
7
7
 
8
8
  === Installation
9
- $ gem install carpodaster-string_normalizr --source http://gems.github.com
9
+ $ gem install string_normalizr
10
10
 
11
11
  Or use it as a plugin with your Rails app:
12
12
 
@@ -15,12 +15,14 @@ Or use it as a plugin with your Rails app:
15
15
  === Usage
16
16
  "hellö world".normalize
17
17
 
18
- === Caveats
19
- * Tests seem to have difficulties with multibyte characters right now.
20
-
21
18
  === .plan
22
- * Add option-hash to customize the normalization
19
+ * support custom collation hash
20
+ * support more special chars by default
21
+ * handle punctuation marks
23
22
 
23
+ === Changelog
24
+ * 0.2: Normalization can now be customized via an options hash.
25
+ * 0.1: Initial version
24
26
 
25
27
  ---
26
28
 
data/Rakefile CHANGED
@@ -9,7 +9,7 @@ Rake::TestTask.new("test") do |t|
9
9
  t.verbose = false
10
10
  end
11
11
 
12
- Echoe.new('string_normalizr', '0.1') do |p|
12
+ Echoe.new('string_normalizr', '0.2') do |p|
13
13
  p.description = "Let String instances be conviently normalized"
14
14
  p.url = "http://github.com/carpodaster/string_normalizr"
15
15
  p.author = "Carsten Zimmermann"
@@ -1,20 +1,110 @@
1
- module AegisNet
1
+ # -*- coding: utf-8 -*-"
2
+ require "rubygems"
3
+ module AegisNet # :nodoc:
2
4
  module StringNormalizr
3
5
 
6
+ COLLATION = {
7
+ 'Ä' => 'Ae',
8
+ 'Æ' => 'Ae',
9
+ 'Å' => 'A',
10
+ 'À' => 'A',
11
+ 'Á' => 'A',
12
+ 'Â' => 'A',
13
+ 'Ç' => 'C',
14
+ 'È' => "E",
15
+ 'É' => "E",
16
+ 'Ê' => "E",
17
+ 'Ë' => 'E',
18
+ 'Í' => 'I',
19
+ 'Ì' => 'I',
20
+ 'Î' => 'I',
21
+ 'Ï' => 'I',
22
+ 'Ñ' => 'N',
23
+ 'Ö' => 'Oe',
24
+ 'Œ' => 'Oe',
25
+ 'Ø' => 'O',
26
+ 'Ô' => 'O',
27
+ 'Ó' => 'O',
28
+ 'Ò' => 'O',
29
+ 'Ü' => 'Ue',
30
+ 'Ú' => 'U',
31
+ 'Ù' => 'U',
32
+ 'Ÿ' => 'Y',
33
+ 'ä' => 'ae',
34
+ 'æ' => 'ae',
35
+ 'å' => 'a',
36
+ 'à' => 'a',
37
+ 'á' => 'a',
38
+ 'â' => 'a',
39
+ 'ç' => 'c',
40
+ 'è' => 'e',
41
+ 'é' => 'e',
42
+ 'ê' => 'e',
43
+ 'ë' => 'e',
44
+ 'í' => 'i',
45
+ 'ì' => 'i',
46
+ 'î' => 'i',
47
+ 'ï' => 'i',
48
+ 'ñ' => 'n',
49
+ 'ö' => 'oe',
50
+ 'œ' => 'oe',
51
+ 'ø' => 'o',
52
+ 'ô' => 'o',
53
+ 'ó' => 'o',
54
+ 'ò' => 'o',
55
+ 'ü' => 'ue',
56
+ 'ú' => 'u',
57
+ 'ù' => 'u',
58
+ 'ÿ' => 'y',
59
+ 'ß' => 'ss',
60
+ }
61
+
4
62
  def self.included(base)
5
63
  base.send(:include, InstanceMethods)
6
64
  end
7
65
 
8
-
9
66
  module InstanceMethods
10
- def normalize
11
- self.strip.gsub('ä', 'ae').gsub('ö', 'oe').gsub('ü', 'ue').gsub('Ä', 'Ae').gsub('Ö', 'Oe').gsub('Ü', 'Ue').gsub("ß", "ss")
67
+
68
+ # Returns a new String based on pre-defined normalization rules
69
+ #
70
+ # == Parameters
71
+ # * +options+: optional Hash for normalization customization
72
+ #
73
+ # == Available options
74
+ # * <tt>:strip</tt> - trim leading and trailing whitespaces (true|false, default: true)
75
+ # * <tt>:replace_whitespaces</tt> - replace whitespaces within the string with +str+
76
+ # or set to +false+ to leave whitespaces alone. Makes little
77
+ # sense w/o :strip => true (str|false, default: "-")
78
+ #
79
+ # == Examples
80
+ # "This is án exåmple".normalize
81
+ # => "This-is-an-example
82
+ #
83
+ # "Tëst string with träiling whitespaces ".normalize(:replace_whitespaces => false)
84
+ # => "Test string with traeiling whitespaces"
85
+ #
86
+ def normalize(options = {})
87
+ # shamelessly taken from ActiveSupport::ActiveSupport::Hash::Keys#assert_valid_keys
88
+ valid_keys = [:replace_whitespaces, :strip]
89
+ unknown_keys = options.keys - [valid_keys].flatten
90
+ raise(ArgumentError, "Unknown key(s): #{unknown_keys.join(", ")}") unless unknown_keys.empty?
91
+
92
+ # Default options
93
+ options = {
94
+ :downcase => false,
95
+ :strip => true,
96
+ :replace_whitespaces => "-"
97
+ }.merge(options)
98
+
99
+ n_str = AegisNet::StringNormalizr::COLLATION.inject(dup) {|str, (collate_from, collate_to)| str.gsub(collate_from, collate_to)}
100
+ n_str.strip! if options[:strip]
101
+ n_str.gsub!(/\s+/, options[:replace_whitespaces]) if options[:replace_whitespaces]
102
+ n_str
12
103
  end
13
104
  end
14
-
15
105
  end
16
106
  end
17
107
 
18
- class String
108
+ class String # :nodoc:
19
109
  include AegisNet::StringNormalizr
20
110
  end
@@ -2,15 +2,15 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{string_normalizr}
5
- s.version = "0.1"
5
+ s.version = "0.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Carsten Zimmermann"]
9
- s.date = %q{2010-09-17}
9
+ s.date = %q{2010-09-20}
10
10
  s.description = %q{Let String instances be conviently normalized}
11
11
  s.email = %q{carp@hacksocke.de}
12
- s.extra_rdoc_files = ["README.rdoc", "lib/string_normalizr.rb"]
13
- s.files = ["README.rdoc", "Rakefile", "init.rb", "lib/string_normalizr.rb", "nbproject/private/rake-d.txt", "string_normalizr.gemspec", "test/string_normalizr_test.rb", "Manifest"]
12
+ s.extra_rdoc_files = ["LICENSE", "README.rdoc", "lib/string_normalizr.rb"]
13
+ s.files = ["LICENSE", "README.rdoc", "Rakefile", "init.rb", "lib/string_normalizr.rb", "nbproject/private/rake-d.txt", "test/string_normalizr_test.rb", "Manifest", "string_normalizr.gemspec"]
14
14
  s.homepage = %q{http://github.com/carpodaster/string_normalizr}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "String_normalizr", "--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -1,22 +1,89 @@
1
+ # -*- coding: utf-8 -*-"
1
2
  require 'test/unit'
2
3
  require File.dirname(__FILE__) + "/../lib/string_normalizr"
3
4
 
4
5
  class StringNormalizrTest < Test::Unit::TestCase
5
-
6
+
6
7
  def test_string_integration
7
8
  assert "some string".respond_to?(:normalize)
8
9
  end
9
10
 
10
11
  def test_whitespaces
12
+ assert_equal "This-is-an-example", "This is an example".normalize
13
+ assert_equal "This is an example", "This is an example".normalize(:replace_whitespaces => false)
14
+ assert_equal "This=is=an=example", "This is an example".normalize(:replace_whitespaces => "=")
15
+
11
16
  assert_equal "foo", " foo \n \t".normalize
17
+ assert_equal "foo \n \t", "foo \n \t".normalize(:strip => false, :replace_whitespaces => false)
18
+ end
19
+
20
+ def test_accents
21
+ assert_equal "a", "á".normalize
22
+ assert_equal "a", "à".normalize
23
+ assert_equal "a", "â".normalize
24
+ assert_equal "A", "Á".normalize
25
+ assert_equal "A", "À".normalize
26
+ assert_equal "A", "Â".normalize
27
+ assert_equal "e", "é".normalize
28
+ assert_equal "e", "è".normalize
29
+ assert_equal "E", "É".normalize
30
+ assert_equal "E", "È".normalize
31
+ assert_equal "i", "í".normalize
32
+ assert_equal "i", "ì".normalize
33
+ assert_equal "i", "î".normalize
34
+ assert_equal "I", "Í".normalize
35
+ assert_equal "I", "Ì".normalize
36
+ assert_equal "I", "Î".normalize
37
+ assert_equal "o", "ó".normalize
38
+ assert_equal "o", "ò".normalize
39
+ assert_equal "o", "ô".normalize
40
+ assert_equal "O", "Ó".normalize
41
+ assert_equal "O", "Ò".normalize
42
+ assert_equal "O", "Ô".normalize
43
+ assert_equal "u", "ú".normalize
44
+ assert_equal "u", "ù".normalize
45
+ assert_equal "U", "Ú".normalize
46
+ assert_equal "U", "Ù".normalize
47
+ end
48
+
49
+ def test_umlauts
50
+ assert_equal "ae", "ä".normalize
51
+ assert_equal "Ae", "Ä".normalize
52
+ assert_equal "oe", "ö".normalize
53
+ assert_equal "Oe", "Ö".normalize
54
+ assert_equal "ue", "ü".normalize
55
+ assert_equal "Ue", "Ü".normalize
56
+ end
57
+
58
+ def test_spanish_chars
59
+ assert_equal "c", "ç".normalize
60
+ assert_equal "C", "Ç".normalize
61
+ assert_equal "n", "ñ".normalize
62
+ assert_equal "N", "Ñ".normalize
63
+ end
64
+
65
+ def test_scandinavian_chars
66
+ assert_equal "a", "å".normalize
67
+ assert_equal "A", "Å".normalize
68
+ assert_equal "o", "ø".normalize
69
+ assert_equal "O", "Ø".normalize
12
70
  end
13
71
 
14
- def test_german_umlauts
15
- assert_equal "foeOebaerueghbAerUegh", "föÖbärüghbÄrÜgh"
72
+ def test_ligatures
73
+ assert_equal "ae", "æ".normalize
74
+ assert_equal "Ae", "Æ".normalize
75
+ assert_equal "oe", "œ".normalize
76
+ assert_equal 'Oe', 'Œ'.normalize
77
+ assert_equal "ss", "ß".normalize
16
78
  end
17
79
 
18
- def test_sz
19
- assert_equal "Strasse", "Straße"
80
+ def test_diaresises
81
+ assert_equal "e", "ë".normalize
82
+ assert_equal "E", "Ë".normalize
83
+ assert_equal "i", "ï".normalize
84
+ assert_equal "I", "Ï".normalize
85
+ assert_equal "y", "ÿ".normalize
86
+ assert_equal "Y", "Ÿ".normalize
20
87
  end
21
88
 
22
89
  end
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_normalizr
3
3
  version: !ruby/object:Gem::Version
4
- hash: 9
4
+ hash: 15
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
- version: "0.1"
8
+ - 2
9
+ version: "0.2"
10
10
  platform: ruby
11
11
  authors:
12
12
  - Carsten Zimmermann
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-17 00:00:00 +02:00
17
+ date: 2010-09-20 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -25,17 +25,19 @@ executables: []
25
25
  extensions: []
26
26
 
27
27
  extra_rdoc_files:
28
+ - LICENSE
28
29
  - README.rdoc
29
30
  - lib/string_normalizr.rb
30
31
  files:
32
+ - LICENSE
31
33
  - README.rdoc
32
34
  - Rakefile
33
35
  - init.rb
34
36
  - lib/string_normalizr.rb
35
37
  - nbproject/private/rake-d.txt
36
- - string_normalizr.gemspec
37
38
  - test/string_normalizr_test.rb
38
39
  - Manifest
40
+ - string_normalizr.gemspec
39
41
  has_rdoc: true
40
42
  homepage: http://github.com/carpodaster/string_normalizr
41
43
  licenses: []