urlify 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,74 @@
1
+ URLify
2
+ ======
3
+
4
+ A tiny library to convert diacritical marks to unaccented equivalents, for
5
+ ASCII-safe URI creation. It also includes a utility method to remove subtitles.
6
+
7
+
8
+ Installation
9
+ ------------
10
+
11
+ sudo gem install urlify
12
+
13
+ URLify is available from [Gemcutter][gc] and in source form on [GitHub][gh].
14
+
15
+ [gc]: http://gemcutter.org/gems/stylish
16
+ [gh]: http://github.com/ionfish/urlify
17
+
18
+
19
+ API
20
+ ---
21
+
22
+ URLify.deaccentuate "Kurt Gödel" # => "Kurt Godel"
23
+
24
+ URLify.strip_subtitle "Begriffsschrift:
25
+ eine der arithmetischen nachgebildete
26
+ Formelsprache des reinen Denkens" # => "Begriffsschrift"
27
+
28
+ URLify.urlify "Über Sinn und Bedeutung" # => "uber_sinn_und_bedeutung"
29
+
30
+ URLify.urlify "Moses Schönfinkel", "-" # => "moses-schoenfinkel"
31
+
32
+ The `URLify` module may be mixed into the `String` class to add the above class
33
+ methods--`deaccentuate`, `strip_subtitle` and `urlify`--as instance methods on
34
+ the `String` class. It is not mixed in by default, for obvious reasons.
35
+
36
+ class String
37
+ include URLify
38
+ end
39
+
40
+ "Grundzüge der theoretischen Logik".urlify
41
+ # => "grundzuge_der_theoretischen_logik"
42
+
43
+ Please note that non-`a-z` characters are removed by the `deaccentuate` and
44
+ `urlify` methods, and only characters in URLify's accent library will be
45
+ replaced by their ASCII counterparts. If the library doesn't include a
46
+ particular conversion, please consider forking the project and adding it.
47
+
48
+
49
+ Licence
50
+ -------
51
+
52
+ Copyright (c) 2009, Benedict Eastaugh. All rights reserved.
53
+
54
+ Redistribution and use in source and binary forms, with or without
55
+ modification, are permitted provided that the following conditions are met:
56
+
57
+ * Redistributions of source code must retain the above copyright notice, this
58
+ list of conditions and the following disclaimer.
59
+ * Redistributions in binary form must reproduce the above copyright notice,
60
+ this list of conditions and the following disclaimer in the documentation
61
+ and/or other materials provided with the distribution.
62
+ * The name of the author may not be used to endorse or promote products
63
+ derived from this software without specific prior written permission.
64
+
65
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
66
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
67
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
68
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
69
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
70
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
71
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
72
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
73
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
74
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,32 @@
1
+ require 'lib/urlify'
2
+
3
+ begin
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |s|
6
+ s.name = "urlify"
7
+ s.summary = "Convert accented characters to their ASCII equivalents"
8
+ s.email = "benedict@eastaugh.net"
9
+ s.homepage = "http://ionfish.github.com/urlify/"
10
+ s.description = "A small library for converting accented characters " +
11
+ "to their ASCII equivalents."
12
+ s.authors = ["Benedict Eastaugh"]
13
+ end
14
+ rescue LoadError
15
+ puts "Jeweler not available. Install it with: sudo gem install " +
16
+ "technicalpickles-jeweler -s http://gems.github.com"
17
+ end
18
+
19
+ task :default => :test
20
+
21
+ desc "Run the URLify test suite"
22
+ task :test do
23
+ require 'test/unit'
24
+
25
+ testdir = "test"
26
+ Dir.foreach(testdir) do |f|
27
+ path = "#{testdir}/#{f}"
28
+ if File.ftype(path) == "file" && File.basename(f).match(/_test.rb$/)
29
+ load path
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 2
3
+ :patch: 0
4
+ :major: 0
@@ -0,0 +1,66 @@
1
+ # encoding: UTF-8
2
+
3
+ module URLify
4
+
5
+ URLIFY_PATH = File.expand_path(File.dirname(__FILE__)) + '/urlify/'
6
+ require URLIFY_PATH + 'accents'
7
+
8
+ # Converts an input string into a URL-safe string.
9
+ #
10
+ # * Leading and trailing whitespace is removed.
11
+ # * Diacritics are removed from all characters.
12
+ # * All letters are converted to lower case.
13
+ # * Remaining whitespace is replaced with separators.
14
+ # * Any remaining character which is not a letter, a digit or a valid
15
+ # separator is removed.
16
+ #
17
+ # Only underscores, dashes, plus signs and the empty string are allowed as
18
+ # separators, although combinations are permitted, so "_", "--", "+_-" and ""
19
+ # are all valid separators.
20
+ def self.urlify(string, separator = "_")
21
+ unless separator =~ /^[\-\_\+]*$/
22
+ separator = "_"
23
+ end
24
+
25
+ deaccentuate(strip_subtitle(string.strip)).
26
+ downcase.
27
+ gsub(/\s/, separator).
28
+ gsub(/[^a-z\d\_\-\+]/, "")
29
+ end
30
+
31
+ # Removes everything from a string after the first colon.
32
+ #
33
+ # Ensures that titles with really long subtitles don't convert to equally
34
+ # long permalinks.
35
+ def self.strip_subtitle(string)
36
+ string.split(/\s*\:\s*/).first
37
+ end
38
+
39
+ # Removes diacritics from an input string's characters.
40
+ #
41
+ # So a lowercase 'u' with an umlaut, ü, becomes u, while an uppercase 'A'
42
+ # with an acute accent, Á, becomes A. This method is UTF-8 safe.
43
+ def self.deaccentuate(string)
44
+ (RUBY_VERSION >= "1.9.0" ? string.chars : string.split(//u)).map {|c|
45
+ ACCENTMAP[c] || c
46
+ }.join("")
47
+ end
48
+
49
+ # Instance method version of URLify.urlify, so that the library can be used
50
+ # as a mixin for the String class.
51
+ def urlify(separator = "_")
52
+ URLify.urlify(self, separator)
53
+ end
54
+
55
+ # Instance method version of URLify.strip_subtitle, so that the library can
56
+ # be used as a mixin for the String class.
57
+ def strip_subtitle
58
+ URLify.strip_subtitle(self)
59
+ end
60
+
61
+ # Instance method version of URLify.deaccentuate, so that the library can be
62
+ # used as a mixin for the String class.
63
+ def deaccentuate
64
+ URLify.deaccentuate(self)
65
+ end
66
+ end
@@ -0,0 +1,79 @@
1
+ # encoding: UTF-8
2
+
3
+ module URLify
4
+
5
+ ACCENTMAP = {
6
+ 'À' => 'A',
7
+ 'Á' => 'A',
8
+ 'Â' => 'A',
9
+ 'Ã' => 'A',
10
+ 'Ä' => 'A',
11
+ 'Å' => 'AA',
12
+ 'Æ' => 'AE',
13
+ 'Ç' => 'C',
14
+ 'È' => 'E',
15
+ 'É' => 'E',
16
+ 'Ê' => 'E',
17
+ 'Ë' => 'E',
18
+ 'Ì' => 'I',
19
+ 'Í' => 'I',
20
+ 'Î' => 'I',
21
+ 'Ï' => 'I',
22
+ 'Ð' => 'D',
23
+ 'Ł' => 'L',
24
+ 'Ñ' => 'N',
25
+ 'Ò' => 'O',
26
+ 'Ó' => 'O',
27
+ 'Ô' => 'O',
28
+ 'Õ' => 'O',
29
+ 'Ö' => 'O',
30
+ 'Ø' => 'OE',
31
+ 'Ù' => 'U',
32
+ 'Ú' => 'U',
33
+ 'Ü' => 'U',
34
+ 'Û' => 'U',
35
+ 'Ý' => 'Y',
36
+ 'Þ' => 'Th',
37
+ 'ß' => 'ss',
38
+ 'à' => 'a',
39
+ 'á' => 'a',
40
+ 'â' => 'a',
41
+ 'ã' => 'a',
42
+ 'ä' => 'a',
43
+ 'å' => 'aa',
44
+ 'æ' => 'ae',
45
+ 'ç' => 'c',
46
+ 'è' => 'e',
47
+ 'é' => 'e',
48
+ 'ê' => 'e',
49
+ 'ë' => 'e',
50
+ 'ì' => 'i',
51
+ 'í' => 'i',
52
+ 'î' => 'i',
53
+ 'ï' => 'i',
54
+ 'ð' => 'd',
55
+ 'ł' => 'l',
56
+ 'ñ' => 'n',
57
+ 'ń' => 'n',
58
+ 'ò' => 'o',
59
+ 'ó' => 'o',
60
+ 'ô' => 'o',
61
+ 'õ' => 'o',
62
+ 'ō' => 'o',
63
+ 'ö' => 'o',
64
+ 'ø' => 'oe',
65
+ 'ś' => 's',
66
+ 'ù' => 'u',
67
+ 'ú' => 'u',
68
+ 'û' => 'u',
69
+ 'ū' => 'u',
70
+ 'ü' => 'u',
71
+ 'ý' => 'y',
72
+ 'þ' => 'th',
73
+ 'ÿ' => 'y',
74
+ 'ż' => 'z',
75
+ 'Œ' => 'OE',
76
+ 'œ' => 'oe',
77
+ '&' => 'and'}
78
+
79
+ end
@@ -0,0 +1 @@
1
+ *.gem
@@ -0,0 +1,44 @@
1
+ # encoding: UTF-8
2
+
3
+ class String
4
+ include URLify
5
+ end
6
+
7
+ class URLifyTest < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @philosopher = "Søren Kierkegaard"
11
+ @biography = "Boyd: The Fighter Pilot Who Changed the Art of War"
12
+ end
13
+
14
+ def test_subtitle_stripping
15
+ assert_equal("Boyd", URLify.strip_subtitle(@biography))
16
+ end
17
+
18
+ def test_mixin_subtitle_stripping
19
+ assert_equal("Boyd", @biography.strip_subtitle)
20
+ end
21
+
22
+ def test_deaccentuation
23
+ assert_equal("Soeren Kierkegaard", URLify.deaccentuate(@philosopher))
24
+ assert_equal("Tomek Bartoszynski", URLify.deaccentuate("Tomek Bartoszyński"))
25
+ assert_equal("Jozef Maria Bochenski", URLify.deaccentuate("Józef Maria Bocheński"))
26
+ assert_equal("Jerzy Los", URLify.deaccentuate("Jerzy Łoś"))
27
+ assert_equal("Jan Lukasiewicz", URLify.deaccentuate("Jan Łukasiewicz"))
28
+ assert_equal("Chaim Perelman", URLify.deaccentuate("Chaïm Perelman"))
29
+ end
30
+
31
+ def test_mixin_deaccentuation
32
+ assert_equal("Soeren Kierkegaard", @philosopher.deaccentuate)
33
+ end
34
+
35
+ def test_urlification
36
+ assert_equal("soeren_kierkegaard", URLify.urlify(@philosopher))
37
+ assert_equal("boyd", URLify.urlify(@biography))
38
+ end
39
+
40
+ def test_mixin_urlification
41
+ assert_equal("soeren_kierkegaard", @philosopher.urlify)
42
+ assert_equal("boyd", @biography.urlify)
43
+ end
44
+ end
@@ -0,0 +1,46 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{urlify}
8
+ s.version = "0.2.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Benedict Eastaugh"]
12
+ s.date = %q{2009-10-09}
13
+ s.description = %q{A small library for converting accented characters to their ASCII equivalents.}
14
+ s.email = %q{benedict@eastaugh.net}
15
+ s.extra_rdoc_files = [
16
+ "README.md"
17
+ ]
18
+ s.files = [
19
+ "README.md",
20
+ "Rakefile",
21
+ "VERSION.yml",
22
+ "lib/urlify.rb",
23
+ "lib/urlify/accents.rb",
24
+ "pkg/.gitignore",
25
+ "test/urlify_test.rb",
26
+ "urlify.gemspec"
27
+ ]
28
+ s.homepage = %q{http://ionfish.github.com/urlify/}
29
+ s.rdoc_options = ["--charset=UTF-8"]
30
+ s.require_paths = ["lib"]
31
+ s.rubygems_version = %q{1.3.5}
32
+ s.summary = %q{Convert accented characters to their ASCII equivalents}
33
+ s.test_files = [
34
+ "test/urlify_test.rb"
35
+ ]
36
+
37
+ if s.respond_to? :specification_version then
38
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
39
+ s.specification_version = 3
40
+
41
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
42
+ else
43
+ end
44
+ else
45
+ end
46
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: urlify
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Benedict Eastaugh
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-09 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: A small library for converting accented characters to their ASCII equivalents.
17
+ email: benedict@eastaugh.net
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.md
24
+ files:
25
+ - README.md
26
+ - Rakefile
27
+ - VERSION.yml
28
+ - lib/urlify.rb
29
+ - lib/urlify/accents.rb
30
+ - pkg/.gitignore
31
+ - test/urlify_test.rb
32
+ - urlify.gemspec
33
+ has_rdoc: true
34
+ homepage: http://ionfish.github.com/urlify/
35
+ licenses: []
36
+
37
+ post_install_message:
38
+ rdoc_options:
39
+ - --charset=UTF-8
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project:
57
+ rubygems_version: 1.3.5
58
+ signing_key:
59
+ specification_version: 3
60
+ summary: Convert accented characters to their ASCII equivalents
61
+ test_files:
62
+ - test/urlify_test.rb