urlify 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ URLify
2
+ ======
3
+
4
+ A tiny library to convert diacritical marks to unaccented equivalents, for
5
+ ASCII-safe URI creation. It also includes a utility method to remove subtitles.
6
+
7
+
8
+ Installation
9
+ ------------
10
+
11
+ sudo gem install urlify
12
+
13
+ URLify is available from [Gemcutter][gc] and in source form on [GitHub][gh].
14
+
15
+ [gc]: http://gemcutter.org/gems/stylish
16
+ [gh]: http://github.com/ionfish/urlify
17
+
18
+
19
+ API
20
+ ---
21
+
22
+ URLify.deaccentuate "Kurt Gödel" # => "Kurt Godel"
23
+
24
+ URLify.strip_subtitle "Begriffsschrift:
25
+ eine der arithmetischen nachgebildete
26
+ Formelsprache des reinen Denkens" # => "Begriffsschrift"
27
+
28
+ URLify.urlify "Über Sinn und Bedeutung" # => "uber_sinn_und_bedeutung"
29
+
30
+ URLify.urlify "Moses Schönfinkel", "-" # => "moses-schoenfinkel"
31
+
32
+ The `URLify` module may be mixed into the `String` class to add the above class
33
+ methods--`deaccentuate`, `strip_subtitle` and `urlify`--as instance methods on
34
+ the `String` class. It is not mixed in by default, for obvious reasons.
35
+
36
+ class String
37
+ include URLify
38
+ end
39
+
40
+ "Grundzüge der theoretischen Logik".urlify
41
+ # => "grundzuge_der_theoretischen_logik"
42
+
43
+ Please note that non-`a-z` characters are removed by the `deaccentuate` and
44
+ `urlify` methods, and only characters in URLify's accent library will be
45
+ replaced by their ASCII counterparts. If the library doesn't include a
46
+ particular conversion, please consider forking the project and adding it.
47
+
48
+
49
+ Licence
50
+ -------
51
+
52
+ Copyright (c) 2009, Benedict Eastaugh. All rights reserved.
53
+
54
+ Redistribution and use in source and binary forms, with or without
55
+ modification, are permitted provided that the following conditions are met:
56
+
57
+ * Redistributions of source code must retain the above copyright notice, this
58
+ list of conditions and the following disclaimer.
59
+ * Redistributions in binary form must reproduce the above copyright notice,
60
+ this list of conditions and the following disclaimer in the documentation
61
+ and/or other materials provided with the distribution.
62
+ * The name of the author may not be used to endorse or promote products
63
+ derived from this software without specific prior written permission.
64
+
65
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
66
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
67
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
68
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
69
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
70
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
71
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
72
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
73
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
74
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,32 @@
1
+ require 'lib/urlify'
2
+
3
+ begin
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |s|
6
+ s.name = "urlify"
7
+ s.summary = "Convert accented characters to their ASCII equivalents"
8
+ s.email = "benedict@eastaugh.net"
9
+ s.homepage = "http://ionfish.github.com/urlify/"
10
+ s.description = "A small library for converting accented characters " +
11
+ "to their ASCII equivalents."
12
+ s.authors = ["Benedict Eastaugh"]
13
+ end
14
+ rescue LoadError
15
+ puts "Jeweler not available. Install it with: sudo gem install " +
16
+ "technicalpickles-jeweler -s http://gems.github.com"
17
+ end
18
+
19
+ task :default => :test
20
+
21
+ desc "Run the URLify test suite"
22
+ task :test do
23
+ require 'test/unit'
24
+
25
+ testdir = "test"
26
+ Dir.foreach(testdir) do |f|
27
+ path = "#{testdir}/#{f}"
28
+ if File.ftype(path) == "file" && File.basename(f).match(/_test.rb$/)
29
+ load path
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 2
3
+ :patch: 0
4
+ :major: 0
@@ -0,0 +1,66 @@
1
+ # encoding: UTF-8
2
+
3
+ module URLify
4
+
5
+ URLIFY_PATH = File.expand_path(File.dirname(__FILE__)) + '/urlify/'
6
+ require URLIFY_PATH + 'accents'
7
+
8
+ # Converts an input string into a URL-safe string.
9
+ #
10
+ # * Leading and trailing whitespace is removed.
11
+ # * Diacritics are removed from all characters.
12
+ # * All letters are converted to lower case.
13
+ # * Remaining whitespace is replaced with separators.
14
+ # * Any remaining character which is not a letter, a digit or a valid
15
+ # separator is removed.
16
+ #
17
+ # Only underscores, dashes, plus signs and the empty string are allowed as
18
+ # separators, although combinations are permitted, so "_", "--", "+_-" and ""
19
+ # are all valid separators.
20
+ def self.urlify(string, separator = "_")
21
+ unless separator =~ /^[\-\_\+]*$/
22
+ separator = "_"
23
+ end
24
+
25
+ deaccentuate(strip_subtitle(string.strip)).
26
+ downcase.
27
+ gsub(/\s/, separator).
28
+ gsub(/[^a-z\d\_\-\+]/, "")
29
+ end
30
+
31
+ # Removes everything from a string after the first colon.
32
+ #
33
+ # Ensures that titles with really long subtitles don't convert to equally
34
+ # long permalinks.
35
+ def self.strip_subtitle(string)
36
+ string.split(/\s*\:\s*/).first
37
+ end
38
+
39
+ # Removes diacritics from an input string's characters.
40
+ #
41
+ # So a lowercase 'u' with an umlaut, ü, becomes u, while an uppercase 'A'
42
+ # with an acute accent, Á, becomes A. This method is UTF-8 safe.
43
+ def self.deaccentuate(string)
44
+ (RUBY_VERSION >= "1.9.0" ? string.chars : string.split(//u)).map {|c|
45
+ ACCENTMAP[c] || c
46
+ }.join("")
47
+ end
48
+
49
+ # Instance method version of URLify.urlify, so that the library can be used
50
+ # as a mixin for the String class.
51
+ def urlify(separator = "_")
52
+ URLify.urlify(self, separator)
53
+ end
54
+
55
+ # Instance method version of URLify.strip_subtitle, so that the library can
56
+ # be used as a mixin for the String class.
57
+ def strip_subtitle
58
+ URLify.strip_subtitle(self)
59
+ end
60
+
61
+ # Instance method version of URLify.deaccentuate, so that the library can be
62
+ # used as a mixin for the String class.
63
+ def deaccentuate
64
+ URLify.deaccentuate(self)
65
+ end
66
+ end
@@ -0,0 +1,79 @@
1
+ # encoding: UTF-8
2
+
3
+ module URLify
4
+
5
+ ACCENTMAP = {
6
+ 'À' => 'A',
7
+ 'Á' => 'A',
8
+ 'Â' => 'A',
9
+ 'Ã' => 'A',
10
+ 'Ä' => 'A',
11
+ 'Å' => 'AA',
12
+ 'Æ' => 'AE',
13
+ 'Ç' => 'C',
14
+ 'È' => 'E',
15
+ 'É' => 'E',
16
+ 'Ê' => 'E',
17
+ 'Ë' => 'E',
18
+ 'Ì' => 'I',
19
+ 'Í' => 'I',
20
+ 'Î' => 'I',
21
+ 'Ï' => 'I',
22
+ 'Ð' => 'D',
23
+ 'Ł' => 'L',
24
+ 'Ñ' => 'N',
25
+ 'Ò' => 'O',
26
+ 'Ó' => 'O',
27
+ 'Ô' => 'O',
28
+ 'Õ' => 'O',
29
+ 'Ö' => 'O',
30
+ 'Ø' => 'OE',
31
+ 'Ù' => 'U',
32
+ 'Ú' => 'U',
33
+ 'Ü' => 'U',
34
+ 'Û' => 'U',
35
+ 'Ý' => 'Y',
36
+ 'Þ' => 'Th',
37
+ 'ß' => 'ss',
38
+ 'à' => 'a',
39
+ 'á' => 'a',
40
+ 'â' => 'a',
41
+ 'ã' => 'a',
42
+ 'ä' => 'a',
43
+ 'å' => 'aa',
44
+ 'æ' => 'ae',
45
+ 'ç' => 'c',
46
+ 'è' => 'e',
47
+ 'é' => 'e',
48
+ 'ê' => 'e',
49
+ 'ë' => 'e',
50
+ 'ì' => 'i',
51
+ 'í' => 'i',
52
+ 'î' => 'i',
53
+ 'ï' => 'i',
54
+ 'ð' => 'd',
55
+ 'ł' => 'l',
56
+ 'ñ' => 'n',
57
+ 'ń' => 'n',
58
+ 'ò' => 'o',
59
+ 'ó' => 'o',
60
+ 'ô' => 'o',
61
+ 'õ' => 'o',
62
+ 'ō' => 'o',
63
+ 'ö' => 'o',
64
+ 'ø' => 'oe',
65
+ 'ś' => 's',
66
+ 'ù' => 'u',
67
+ 'ú' => 'u',
68
+ 'û' => 'u',
69
+ 'ū' => 'u',
70
+ 'ü' => 'u',
71
+ 'ý' => 'y',
72
+ 'þ' => 'th',
73
+ 'ÿ' => 'y',
74
+ 'ż' => 'z',
75
+ 'Œ' => 'OE',
76
+ 'œ' => 'oe',
77
+ '&' => 'and'}
78
+
79
+ end
@@ -0,0 +1 @@
1
+ *.gem
@@ -0,0 +1,44 @@
1
+ # encoding: UTF-8
2
+
3
+ class String
4
+ include URLify
5
+ end
6
+
7
+ class URLifyTest < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @philosopher = "Søren Kierkegaard"
11
+ @biography = "Boyd: The Fighter Pilot Who Changed the Art of War"
12
+ end
13
+
14
+ def test_subtitle_stripping
15
+ assert_equal("Boyd", URLify.strip_subtitle(@biography))
16
+ end
17
+
18
+ def test_mixin_subtitle_stripping
19
+ assert_equal("Boyd", @biography.strip_subtitle)
20
+ end
21
+
22
+ def test_deaccentuation
23
+ assert_equal("Soeren Kierkegaard", URLify.deaccentuate(@philosopher))
24
+ assert_equal("Tomek Bartoszynski", URLify.deaccentuate("Tomek Bartoszyński"))
25
+ assert_equal("Jozef Maria Bochenski", URLify.deaccentuate("Józef Maria Bocheński"))
26
+ assert_equal("Jerzy Los", URLify.deaccentuate("Jerzy Łoś"))
27
+ assert_equal("Jan Lukasiewicz", URLify.deaccentuate("Jan Łukasiewicz"))
28
+ assert_equal("Chaim Perelman", URLify.deaccentuate("Chaïm Perelman"))
29
+ end
30
+
31
+ def test_mixin_deaccentuation
32
+ assert_equal("Soeren Kierkegaard", @philosopher.deaccentuate)
33
+ end
34
+
35
+ def test_urlification
36
+ assert_equal("soeren_kierkegaard", URLify.urlify(@philosopher))
37
+ assert_equal("boyd", URLify.urlify(@biography))
38
+ end
39
+
40
+ def test_mixin_urlification
41
+ assert_equal("soeren_kierkegaard", @philosopher.urlify)
42
+ assert_equal("boyd", @biography.urlify)
43
+ end
44
+ end
@@ -0,0 +1,46 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{urlify}
8
+ s.version = "0.2.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Benedict Eastaugh"]
12
+ s.date = %q{2009-10-09}
13
+ s.description = %q{A small library for converting accented characters to their ASCII equivalents.}
14
+ s.email = %q{benedict@eastaugh.net}
15
+ s.extra_rdoc_files = [
16
+ "README.md"
17
+ ]
18
+ s.files = [
19
+ "README.md",
20
+ "Rakefile",
21
+ "VERSION.yml",
22
+ "lib/urlify.rb",
23
+ "lib/urlify/accents.rb",
24
+ "pkg/.gitignore",
25
+ "test/urlify_test.rb",
26
+ "urlify.gemspec"
27
+ ]
28
+ s.homepage = %q{http://ionfish.github.com/urlify/}
29
+ s.rdoc_options = ["--charset=UTF-8"]
30
+ s.require_paths = ["lib"]
31
+ s.rubygems_version = %q{1.3.5}
32
+ s.summary = %q{Convert accented characters to their ASCII equivalents}
33
+ s.test_files = [
34
+ "test/urlify_test.rb"
35
+ ]
36
+
37
+ if s.respond_to? :specification_version then
38
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
39
+ s.specification_version = 3
40
+
41
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
42
+ else
43
+ end
44
+ else
45
+ end
46
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: urlify
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Benedict Eastaugh
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-09 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A small library for converting accented characters to their ASCII equivalents.
17
+ email: benedict@eastaugh.net
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.md
24
+ files:
25
+ - README.md
26
+ - Rakefile
27
+ - VERSION.yml
28
+ - lib/urlify.rb
29
+ - lib/urlify/accents.rb
30
+ - pkg/.gitignore
31
+ - test/urlify_test.rb
32
+ - urlify.gemspec
33
+ has_rdoc: true
34
+ homepage: http://ionfish.github.com/urlify/
35
+ licenses: []
36
+
37
+ post_install_message:
38
+ rdoc_options:
39
+ - --charset=UTF-8
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project:
57
+ rubygems_version: 1.3.5
58
+ signing_key:
59
+ specification_version: 3
60
+ summary: Convert accented characters to their ASCII equivalents
61
+ test_files:
62
+ - test/urlify_test.rb