urlify 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +74 -0
- data/Rakefile +32 -0
- data/VERSION.yml +4 -0
- data/lib/urlify.rb +66 -0
- data/lib/urlify/accents.rb +79 -0
- data/pkg/.gitignore +1 -0
- data/test/urlify_test.rb +44 -0
- data/urlify.gemspec +46 -0
- metadata +62 -0
data/README.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
URLify
|
2
|
+
======
|
3
|
+
|
4
|
+
A tiny library to convert diacritical marks to unaccented equivalents, for
|
5
|
+
ASCII-safe URI creation. It also includes a utility method to remove subtitles.
|
6
|
+
|
7
|
+
|
8
|
+
Installation
|
9
|
+
------------
|
10
|
+
|
11
|
+
sudo gem install urlify
|
12
|
+
|
13
|
+
URLify is available from [Gemcutter][gc] and in source form on [GitHub][gh].
|
14
|
+
|
15
|
+
[gc]: http://gemcutter.org/gems/stylish
|
16
|
+
[gh]: http://github.com/ionfish/urlify
|
17
|
+
|
18
|
+
|
19
|
+
API
|
20
|
+
---
|
21
|
+
|
22
|
+
URLify.deaccentuate "Kurt Gödel" # => "Kurt Godel"
|
23
|
+
|
24
|
+
URLify.strip_subtitle "Begriffsschrift:
|
25
|
+
eine der arithmetischen nachgebildete
|
26
|
+
Formelsprache des reinen Denkens" # => "Begriffsschrift"
|
27
|
+
|
28
|
+
URLify.urlify "Über Sinn und Bedeutung" # => "uber_sinn_und_bedeutung"
|
29
|
+
|
30
|
+
URLify.urlify "Moses Schönfinkel", "-" # => "moses-schoenfinkel"
|
31
|
+
|
32
|
+
The `URLify` module may be mixed into the `String` class to add the above class
|
33
|
+
methods--`deaccentuate`, `strip_subtitle` and `urlify`--as instance methods on
|
34
|
+
the `String` class. It is not mixed in by default, for obvious reasons.
|
35
|
+
|
36
|
+
class String
|
37
|
+
include URLify
|
38
|
+
end
|
39
|
+
|
40
|
+
"Grundzüge der theoretischen Logik".urlify
|
41
|
+
# => "grundzuge_der_theoretischen_logik"
|
42
|
+
|
43
|
+
Please note that non-`a-z` characters are removed by the `deaccentuate` and
|
44
|
+
`urlify` methods, and only characters in URLify's accent library will be
|
45
|
+
replaced by their ASCII counterparts. If the library doesn't include a
|
46
|
+
particular conversion, please consider forking the project and adding it.
|
47
|
+
|
48
|
+
|
49
|
+
Licence
|
50
|
+
-------
|
51
|
+
|
52
|
+
Copyright (c) 2009, Benedict Eastaugh. All rights reserved.
|
53
|
+
|
54
|
+
Redistribution and use in source and binary forms, with or without
|
55
|
+
modification, are permitted provided that the following conditions are met:
|
56
|
+
|
57
|
+
* Redistributions of source code must retain the above copyright notice, this
|
58
|
+
list of conditions and the following disclaimer.
|
59
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
60
|
+
this list of conditions and the following disclaimer in the documentation
|
61
|
+
and/or other materials provided with the distribution.
|
62
|
+
* The name of the author may not be used to endorse or promote products
|
63
|
+
derived from this software without specific prior written permission.
|
64
|
+
|
65
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
66
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
67
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
68
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
69
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
70
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
71
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
72
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
73
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
74
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'lib/urlify'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |s|
|
6
|
+
s.name = "urlify"
|
7
|
+
s.summary = "Convert accented characters to their ASCII equivalents"
|
8
|
+
s.email = "benedict@eastaugh.net"
|
9
|
+
s.homepage = "http://ionfish.github.com/urlify/"
|
10
|
+
s.description = "A small library for converting accented characters " +
|
11
|
+
"to their ASCII equivalents."
|
12
|
+
s.authors = ["Benedict Eastaugh"]
|
13
|
+
end
|
14
|
+
rescue LoadError
|
15
|
+
puts "Jeweler not available. Install it with: sudo gem install " +
|
16
|
+
"technicalpickles-jeweler -s http://gems.github.com"
|
17
|
+
end
|
18
|
+
|
19
|
+
task :default => :test
|
20
|
+
|
21
|
+
desc "Run the URLify test suite"
|
22
|
+
task :test do
|
23
|
+
require 'test/unit'
|
24
|
+
|
25
|
+
testdir = "test"
|
26
|
+
Dir.foreach(testdir) do |f|
|
27
|
+
path = "#{testdir}/#{f}"
|
28
|
+
if File.ftype(path) == "file" && File.basename(f).match(/_test.rb$/)
|
29
|
+
load path
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/VERSION.yml
ADDED
data/lib/urlify.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module URLify
|
4
|
+
|
5
|
+
URLIFY_PATH = File.expand_path(File.dirname(__FILE__)) + '/urlify/'
|
6
|
+
require URLIFY_PATH + 'accents'
|
7
|
+
|
8
|
+
# Converts an input string into a URL-safe string.
|
9
|
+
#
|
10
|
+
# * Leading and trailing whitespace is removed.
|
11
|
+
# * Diacritics are removed from all characters.
|
12
|
+
# * All letters are converted to lower case.
|
13
|
+
# * Remaining whitespace is replaced with separators.
|
14
|
+
# * Any remaining character which is not a letter, a digit or a valid
|
15
|
+
# separator is removed.
|
16
|
+
#
|
17
|
+
# Only underscores, dashes, plus signs and the empty string are allowed as
|
18
|
+
# separators, although combinations are permitted, so "_", "--", "+_-" and ""
|
19
|
+
# are all valid separators.
|
20
|
+
def self.urlify(string, separator = "_")
|
21
|
+
unless separator =~ /^[\-\_\+]*$/
|
22
|
+
separator = "_"
|
23
|
+
end
|
24
|
+
|
25
|
+
deaccentuate(strip_subtitle(string.strip)).
|
26
|
+
downcase.
|
27
|
+
gsub(/\s/, separator).
|
28
|
+
gsub(/[^a-z\d\_\-\+]/, "")
|
29
|
+
end
|
30
|
+
|
31
|
+
# Removes everything from a string after the first colon.
|
32
|
+
#
|
33
|
+
# Ensures that titles with really long subtitles don't convert to equally
|
34
|
+
# long permalinks.
|
35
|
+
def self.strip_subtitle(string)
|
36
|
+
string.split(/\s*\:\s*/).first
|
37
|
+
end
|
38
|
+
|
39
|
+
# Removes diacritics from an input string's characters.
|
40
|
+
#
|
41
|
+
# So a lowercase 'u' with an umlaut, ü, becomes u, while an uppercase 'A'
|
42
|
+
# with an acute accent, Á, becomes A. This method is UTF-8 safe.
|
43
|
+
def self.deaccentuate(string)
|
44
|
+
(RUBY_VERSION >= "1.9.0" ? string.chars : string.split(//u)).map {|c|
|
45
|
+
ACCENTMAP[c] || c
|
46
|
+
}.join("")
|
47
|
+
end
|
48
|
+
|
49
|
+
# Instance method version of URLify.urlify, so that the library can be used
|
50
|
+
# as a mixin for the String class.
|
51
|
+
def urlify(separator = "_")
|
52
|
+
URLify.urlify(self, separator)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Instance method version of URLify.strip_subtitle, so that the library can
|
56
|
+
# be used as a mixin for the String class.
|
57
|
+
def strip_subtitle
|
58
|
+
URLify.strip_subtitle(self)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Instance method version of URLify.deaccentuate, so that the library can be
|
62
|
+
# used as a mixin for the String class.
|
63
|
+
def deaccentuate
|
64
|
+
URLify.deaccentuate(self)
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module URLify
|
4
|
+
|
5
|
+
ACCENTMAP = {
|
6
|
+
'À' => 'A',
|
7
|
+
'Á' => 'A',
|
8
|
+
'Â' => 'A',
|
9
|
+
'Ã' => 'A',
|
10
|
+
'Ä' => 'A',
|
11
|
+
'Å' => 'AA',
|
12
|
+
'Æ' => 'AE',
|
13
|
+
'Ç' => 'C',
|
14
|
+
'È' => 'E',
|
15
|
+
'É' => 'E',
|
16
|
+
'Ê' => 'E',
|
17
|
+
'Ë' => 'E',
|
18
|
+
'Ì' => 'I',
|
19
|
+
'Í' => 'I',
|
20
|
+
'Î' => 'I',
|
21
|
+
'Ï' => 'I',
|
22
|
+
'Ð' => 'D',
|
23
|
+
'Ł' => 'L',
|
24
|
+
'Ñ' => 'N',
|
25
|
+
'Ò' => 'O',
|
26
|
+
'Ó' => 'O',
|
27
|
+
'Ô' => 'O',
|
28
|
+
'Õ' => 'O',
|
29
|
+
'Ö' => 'O',
|
30
|
+
'Ø' => 'OE',
|
31
|
+
'Ù' => 'U',
|
32
|
+
'Ú' => 'U',
|
33
|
+
'Ü' => 'U',
|
34
|
+
'Û' => 'U',
|
35
|
+
'Ý' => 'Y',
|
36
|
+
'Þ' => 'Th',
|
37
|
+
'ß' => 'ss',
|
38
|
+
'à' => 'a',
|
39
|
+
'á' => 'a',
|
40
|
+
'â' => 'a',
|
41
|
+
'ã' => 'a',
|
42
|
+
'ä' => 'a',
|
43
|
+
'å' => 'aa',
|
44
|
+
'æ' => 'ae',
|
45
|
+
'ç' => 'c',
|
46
|
+
'è' => 'e',
|
47
|
+
'é' => 'e',
|
48
|
+
'ê' => 'e',
|
49
|
+
'ë' => 'e',
|
50
|
+
'ì' => 'i',
|
51
|
+
'í' => 'i',
|
52
|
+
'î' => 'i',
|
53
|
+
'ï' => 'i',
|
54
|
+
'ð' => 'd',
|
55
|
+
'ł' => 'l',
|
56
|
+
'ñ' => 'n',
|
57
|
+
'ń' => 'n',
|
58
|
+
'ò' => 'o',
|
59
|
+
'ó' => 'o',
|
60
|
+
'ô' => 'o',
|
61
|
+
'õ' => 'o',
|
62
|
+
'ō' => 'o',
|
63
|
+
'ö' => 'o',
|
64
|
+
'ø' => 'oe',
|
65
|
+
'ś' => 's',
|
66
|
+
'ù' => 'u',
|
67
|
+
'ú' => 'u',
|
68
|
+
'û' => 'u',
|
69
|
+
'ū' => 'u',
|
70
|
+
'ü' => 'u',
|
71
|
+
'ý' => 'y',
|
72
|
+
'þ' => 'th',
|
73
|
+
'ÿ' => 'y',
|
74
|
+
'ż' => 'z',
|
75
|
+
'Œ' => 'OE',
|
76
|
+
'œ' => 'oe',
|
77
|
+
'&' => 'and'}
|
78
|
+
|
79
|
+
end
|
data/pkg/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.gem
|
data/test/urlify_test.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
class String
|
4
|
+
include URLify
|
5
|
+
end
|
6
|
+
|
7
|
+
class URLifyTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@philosopher = "Søren Kierkegaard"
|
11
|
+
@biography = "Boyd: The Fighter Pilot Who Changed the Art of War"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_subtitle_stripping
|
15
|
+
assert_equal("Boyd", URLify.strip_subtitle(@biography))
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_mixin_subtitle_stripping
|
19
|
+
assert_equal("Boyd", @biography.strip_subtitle)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_deaccentuation
|
23
|
+
assert_equal("Soeren Kierkegaard", URLify.deaccentuate(@philosopher))
|
24
|
+
assert_equal("Tomek Bartoszynski", URLify.deaccentuate("Tomek Bartoszyński"))
|
25
|
+
assert_equal("Jozef Maria Bochenski", URLify.deaccentuate("Józef Maria Bocheński"))
|
26
|
+
assert_equal("Jerzy Los", URLify.deaccentuate("Jerzy Łoś"))
|
27
|
+
assert_equal("Jan Lukasiewicz", URLify.deaccentuate("Jan Łukasiewicz"))
|
28
|
+
assert_equal("Chaim Perelman", URLify.deaccentuate("Chaïm Perelman"))
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_mixin_deaccentuation
|
32
|
+
assert_equal("Soeren Kierkegaard", @philosopher.deaccentuate)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_urlification
|
36
|
+
assert_equal("soeren_kierkegaard", URLify.urlify(@philosopher))
|
37
|
+
assert_equal("boyd", URLify.urlify(@biography))
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_mixin_urlification
|
41
|
+
assert_equal("soeren_kierkegaard", @philosopher.urlify)
|
42
|
+
assert_equal("boyd", @biography.urlify)
|
43
|
+
end
|
44
|
+
end
|
data/urlify.gemspec
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{urlify}
|
8
|
+
s.version = "0.2.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Benedict Eastaugh"]
|
12
|
+
s.date = %q{2009-10-09}
|
13
|
+
s.description = %q{A small library for converting accented characters to their ASCII equivalents.}
|
14
|
+
s.email = %q{benedict@eastaugh.net}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"README.md"
|
17
|
+
]
|
18
|
+
s.files = [
|
19
|
+
"README.md",
|
20
|
+
"Rakefile",
|
21
|
+
"VERSION.yml",
|
22
|
+
"lib/urlify.rb",
|
23
|
+
"lib/urlify/accents.rb",
|
24
|
+
"pkg/.gitignore",
|
25
|
+
"test/urlify_test.rb",
|
26
|
+
"urlify.gemspec"
|
27
|
+
]
|
28
|
+
s.homepage = %q{http://ionfish.github.com/urlify/}
|
29
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
30
|
+
s.require_paths = ["lib"]
|
31
|
+
s.rubygems_version = %q{1.3.5}
|
32
|
+
s.summary = %q{Convert accented characters to their ASCII equivalents}
|
33
|
+
s.test_files = [
|
34
|
+
"test/urlify_test.rb"
|
35
|
+
]
|
36
|
+
|
37
|
+
if s.respond_to? :specification_version then
|
38
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
39
|
+
s.specification_version = 3
|
40
|
+
|
41
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
42
|
+
else
|
43
|
+
end
|
44
|
+
else
|
45
|
+
end
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: urlify
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Benedict Eastaugh
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-09 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A small library for converting accented characters to their ASCII equivalents.
|
17
|
+
email: benedict@eastaugh.net
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.md
|
24
|
+
files:
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- VERSION.yml
|
28
|
+
- lib/urlify.rb
|
29
|
+
- lib/urlify/accents.rb
|
30
|
+
- pkg/.gitignore
|
31
|
+
- test/urlify_test.rb
|
32
|
+
- urlify.gemspec
|
33
|
+
has_rdoc: true
|
34
|
+
homepage: http://ionfish.github.com/urlify/
|
35
|
+
licenses: []
|
36
|
+
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options:
|
39
|
+
- --charset=UTF-8
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: "0"
|
47
|
+
version:
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
requirements: []
|
55
|
+
|
56
|
+
rubyforge_project:
|
57
|
+
rubygems_version: 1.3.5
|
58
|
+
signing_key:
|
59
|
+
specification_version: 3
|
60
|
+
summary: Convert accented characters to their ASCII equivalents
|
61
|
+
test_files:
|
62
|
+
- test/urlify_test.rb
|