ionfish-urlify 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +69 -0
- data/Rakefile +32 -0
- data/VERSION.yml +4 -0
- data/lib/urlify.rb +66 -0
- data/lib/urlify/accents.rb +74 -0
- data/test/urlify_test.rb +39 -0
- data/urlify.gemspec +42 -0
- metadata +59 -0
data/README.md
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
URLify
|
2
|
+
======
|
3
|
+
|
4
|
+
A tiny library to convert diacritical marks to unaccented equivalents, for
|
5
|
+
ASCII-safe URI creation. It also includes a utility method to remove subtitles.
|
6
|
+
|
7
|
+
|
8
|
+
Installation
|
9
|
+
------------
|
10
|
+
|
11
|
+
sudo gem install ionfish-urlify
|
12
|
+
|
13
|
+
|
14
|
+
API
|
15
|
+
---
|
16
|
+
|
17
|
+
URLify.deaccentuate("Kurt Gödel") # => "Kurt Godel"
|
18
|
+
|
19
|
+
URLify.strip_subtitle "Begriffsschrift:
|
20
|
+
eine der arithmetischen nachgebildete
|
21
|
+
Formelsprache des reinen Denkens" # => "Begriffsschrift"
|
22
|
+
|
23
|
+
URLify.urlify "Über Sinn und Bedeutung" # => "uber_sinn_und_bedeutung"
|
24
|
+
|
25
|
+
URLify.urlify "Moses Schönfinkel", "-" # => "moses-schoenfinkel"
|
26
|
+
|
27
|
+
The `URLify` module may be mixed into the `String` class to add the above class
|
28
|
+
methods--`deaccentuate`, `strip_subtitle` and `urlify`--as instance methods on
|
29
|
+
the `String` class. It is not mixed in by default, for obvious reasons.
|
30
|
+
|
31
|
+
class String
|
32
|
+
include URLify
|
33
|
+
end
|
34
|
+
|
35
|
+
"Grundzüge der theoretischen Logik".urlify
|
36
|
+
# => "grundzuge_der_theoretischen_logik"
|
37
|
+
|
38
|
+
Please note that non-`a-z` characters are removed by the `deaccentuate` and
|
39
|
+
`urlify` methods, and only characters in URLify's accent library will be
|
40
|
+
replaced by their ASCII counterparts. If the library doesn't include a
|
41
|
+
particular conversion, please consider forking the project and adding it.
|
42
|
+
|
43
|
+
|
44
|
+
Licence
|
45
|
+
-------
|
46
|
+
|
47
|
+
Copyright (c) 2009, Benedict Eastaugh. All rights reserved.
|
48
|
+
|
49
|
+
Redistribution and use in source and binary forms, with or without
|
50
|
+
modification, are permitted provided that the following conditions are met:
|
51
|
+
|
52
|
+
* Redistributions of source code must retain the above copyright notice, this
|
53
|
+
list of conditions and the following disclaimer.
|
54
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
55
|
+
this list of conditions and the following disclaimer in the documentation
|
56
|
+
and/or other materials provided with the distribution.
|
57
|
+
* The name of the author may not be used to endorse or promote products
|
58
|
+
derived from this software without specific prior written permission.
|
59
|
+
|
60
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
61
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
62
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
63
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
64
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
65
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
66
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
67
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
68
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
69
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'lib/urlify'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |s|
|
6
|
+
s.name = "urlify"
|
7
|
+
s.summary = "Diacritic conversion"
|
8
|
+
s.email = "benedict@eastaugh.net"
|
9
|
+
s.homepage = "http://ionfish.github.com/urlify/"
|
10
|
+
s.description = "A small library for converting accented characters " +
|
11
|
+
"to their ASCII equivalents."
|
12
|
+
s.authors = ["Benedict Eastaugh"]
|
13
|
+
end
|
14
|
+
rescue LoadError
|
15
|
+
puts "Jeweler not available. Install it with: sudo gem install " +
|
16
|
+
"technicalpickles-jeweler -s http://gems.github.com"
|
17
|
+
end
|
18
|
+
|
19
|
+
task :default => :test
|
20
|
+
|
21
|
+
desc "Run the URLify test suite"
|
22
|
+
task :test do
|
23
|
+
require 'test/unit'
|
24
|
+
|
25
|
+
testdir = "test"
|
26
|
+
Dir.foreach(testdir) do |f|
|
27
|
+
path = "#{testdir}/#{f}"
|
28
|
+
if File.ftype(path) == "file" && File.basename(f).match(/_test.rb$/)
|
29
|
+
load path
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/VERSION.yml
ADDED
data/lib/urlify.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module URLify
|
4
|
+
|
5
|
+
URLIFY_PATH = File.expand_path(File.dirname(__FILE__)) + '/urlify/'
|
6
|
+
require URLIFY_PATH + 'accents'
|
7
|
+
|
8
|
+
# Converts an input string into a URL-safe string.
|
9
|
+
#
|
10
|
+
# * Leading and trailing whitespace is removed.
|
11
|
+
# * Diacritics are removed from all characters.
|
12
|
+
# * All letters are converted to lower case.
|
13
|
+
# * Remaining whitespace is replaced with separators.
|
14
|
+
# * Any remaining character which is not a letter, a digit or a valid
|
15
|
+
# separator is removed.
|
16
|
+
#
|
17
|
+
# Only underscores, dashes, plus signs and the empty string are allowed as
|
18
|
+
# separators, although combinations are permitted, so "_", "--", "+_-" and ""
|
19
|
+
# are all valid separators.
|
20
|
+
def self.urlify(string, separator = "_")
|
21
|
+
unless separator =~ /^[\-\_\+]*$/
|
22
|
+
separator = "_"
|
23
|
+
end
|
24
|
+
|
25
|
+
deaccentuate(strip_subtitle(string.strip)).
|
26
|
+
downcase.
|
27
|
+
gsub(/\s/, separator).
|
28
|
+
gsub(/[^a-z\d\_\-\+]/, "")
|
29
|
+
end
|
30
|
+
|
31
|
+
# Removes everything from a string after the first colon.
|
32
|
+
#
|
33
|
+
# Ensures that titles with really long subtitles don't convert to equally
|
34
|
+
# long permalinks.
|
35
|
+
def self.strip_subtitle(string)
|
36
|
+
string.split(/\s*\:\s*/).first
|
37
|
+
end
|
38
|
+
|
39
|
+
# Removes diacritics from an input string's characters.
|
40
|
+
#
|
41
|
+
# So a lowercase 'u' with an umlaut, ü, becomes u, while an uppercase 'A'
|
42
|
+
# with an acute accent, Á, becomes A. This method is UTF-8 safe.
|
43
|
+
def self.deaccentuate(string)
|
44
|
+
(RUBY_VERSION >= "1.9.0" ? string.chars : string.split(//u)).map {|c|
|
45
|
+
ACCENTMAP[c] || c
|
46
|
+
}.join("")
|
47
|
+
end
|
48
|
+
|
49
|
+
# Instance method version of URLify.urlify, so that the library can be used
|
50
|
+
# as a mixin for the String class.
|
51
|
+
def urlify(separator = "_")
|
52
|
+
URLify.urlify(self, separator)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Instance method version of URLify.strip_subtitle, so that the library can
|
56
|
+
# be used as a mixin for the String class.
|
57
|
+
def strip_subtitle
|
58
|
+
URLify.strip_subtitle(self)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Instance method version of URLify.deaccentuate, so that the library can be
|
62
|
+
# used as a mixin for the String class.
|
63
|
+
def deaccentuate
|
64
|
+
URLify.deaccentuate(self)
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module URLify
|
4
|
+
|
5
|
+
ACCENTMAP = {
|
6
|
+
'À' => 'A',
|
7
|
+
'Á' => 'A',
|
8
|
+
'Â' => 'A',
|
9
|
+
'Ã' => 'A',
|
10
|
+
'Ä' => 'A',
|
11
|
+
'Å' => 'AA',
|
12
|
+
'Æ' => 'AE',
|
13
|
+
'Ç' => 'C',
|
14
|
+
'È' => 'E',
|
15
|
+
'É' => 'E',
|
16
|
+
'Ê' => 'E',
|
17
|
+
'Ë' => 'E',
|
18
|
+
'Ì' => 'I',
|
19
|
+
'Í' => 'I',
|
20
|
+
'Î' => 'I',
|
21
|
+
'Ï' => 'I',
|
22
|
+
'Ð' => 'D',
|
23
|
+
'Ñ' => 'N',
|
24
|
+
'Ò' => 'O',
|
25
|
+
'Ó' => 'O',
|
26
|
+
'Ô' => 'O',
|
27
|
+
'Õ' => 'O',
|
28
|
+
'Ö' => 'O',
|
29
|
+
'Ø' => 'OE',
|
30
|
+
'Ù' => 'U',
|
31
|
+
'Ú' => 'U',
|
32
|
+
'Ü' => 'U',
|
33
|
+
'Û' => 'U',
|
34
|
+
'Ý' => 'Y',
|
35
|
+
'Þ' => 'Th',
|
36
|
+
'ß' => 'ss',
|
37
|
+
'à' => 'a',
|
38
|
+
'á' => 'a',
|
39
|
+
'â' => 'a',
|
40
|
+
'ã' => 'a',
|
41
|
+
'ä' => 'a',
|
42
|
+
'å' => 'aa',
|
43
|
+
'æ' => 'ae',
|
44
|
+
'ç' => 'c',
|
45
|
+
'è' => 'e',
|
46
|
+
'é' => 'e',
|
47
|
+
'ê' => 'e',
|
48
|
+
'ë' => 'e',
|
49
|
+
'ì' => 'i',
|
50
|
+
'í' => 'i',
|
51
|
+
'î' => 'i',
|
52
|
+
'ï' => 'i',
|
53
|
+
'ð' => 'd',
|
54
|
+
'ñ' => 'n',
|
55
|
+
'ò' => 'o',
|
56
|
+
'ó' => 'o',
|
57
|
+
'ô' => 'o',
|
58
|
+
'õ' => 'o',
|
59
|
+
'ō' => 'o',
|
60
|
+
'ö' => 'o',
|
61
|
+
'ø' => 'oe',
|
62
|
+
'ù' => 'u',
|
63
|
+
'ú' => 'u',
|
64
|
+
'û' => 'u',
|
65
|
+
'ū' => 'u',
|
66
|
+
'ü' => 'u',
|
67
|
+
'ý' => 'y',
|
68
|
+
'þ' => 'th',
|
69
|
+
'ÿ' => 'y',
|
70
|
+
'Œ' => 'OE',
|
71
|
+
'œ' => 'oe',
|
72
|
+
'&' => 'and'}
|
73
|
+
|
74
|
+
end
|
data/test/urlify_test.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
class String
|
4
|
+
include URLify
|
5
|
+
end
|
6
|
+
|
7
|
+
class URLifyTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@philosopher = "Søren Kierkegaard"
|
11
|
+
@biography = "Boyd: The Fighter Pilot Who Changed the Art of War"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_subtitle_stripping
|
15
|
+
assert_equal("Boyd", URLify.strip_subtitle(@biography))
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_mixin_subtitle_stripping
|
19
|
+
assert_equal("Boyd", @biography.strip_subtitle)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_deaccentuation
|
23
|
+
assert_equal("Soeren Kierkegaard", URLify.deaccentuate(@philosopher))
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_mixin_deaccentuation
|
27
|
+
assert_equal("Soeren Kierkegaard", @philosopher.deaccentuate)
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_urlification
|
31
|
+
assert_equal("soeren_kierkegaard", URLify.urlify(@philosopher))
|
32
|
+
assert_equal("boyd", URLify.urlify(@biography))
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_mixin_urlification
|
36
|
+
assert_equal("soeren_kierkegaard", @philosopher.urlify)
|
37
|
+
assert_equal("boyd", @biography.urlify)
|
38
|
+
end
|
39
|
+
end
|
data/urlify.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{urlify}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Benedict Eastaugh"]
|
9
|
+
s.date = %q{2009-06-29}
|
10
|
+
s.description = %q{A small library for converting accented characters to their ASCII equivalents.}
|
11
|
+
s.email = %q{benedict@eastaugh.net}
|
12
|
+
s.extra_rdoc_files = [
|
13
|
+
"README.md"
|
14
|
+
]
|
15
|
+
s.files = [
|
16
|
+
"README.md",
|
17
|
+
"Rakefile",
|
18
|
+
"VERSION.yml",
|
19
|
+
"lib/urlify.rb",
|
20
|
+
"lib/urlify/accents.rb",
|
21
|
+
"test/urlify_test.rb",
|
22
|
+
"urlify.gemspec"
|
23
|
+
]
|
24
|
+
s.homepage = %q{http://ionfish.github.com/urlify/}
|
25
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
26
|
+
s.require_paths = ["lib"]
|
27
|
+
s.rubygems_version = %q{1.3.4}
|
28
|
+
s.summary = %q{Diacritic conversion}
|
29
|
+
s.test_files = [
|
30
|
+
"test/urlify_test.rb"
|
31
|
+
]
|
32
|
+
|
33
|
+
if s.respond_to? :specification_version then
|
34
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
35
|
+
s.specification_version = 3
|
36
|
+
|
37
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
38
|
+
else
|
39
|
+
end
|
40
|
+
else
|
41
|
+
end
|
42
|
+
end
|
metadata
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ionfish-urlify
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Benedict Eastaugh
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-06-29 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A small library for converting accented characters to their ASCII equivalents.
|
17
|
+
email: benedict@eastaugh.net
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.md
|
24
|
+
files:
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- VERSION.yml
|
28
|
+
- lib/urlify.rb
|
29
|
+
- lib/urlify/accents.rb
|
30
|
+
- test/urlify_test.rb
|
31
|
+
- urlify.gemspec
|
32
|
+
has_rdoc: false
|
33
|
+
homepage: http://ionfish.github.com/urlify/
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options:
|
36
|
+
- --charset=UTF-8
|
37
|
+
require_paths:
|
38
|
+
- lib
|
39
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: "0"
|
50
|
+
version:
|
51
|
+
requirements: []
|
52
|
+
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 1.2.0
|
55
|
+
signing_key:
|
56
|
+
specification_version: 3
|
57
|
+
summary: Diacritic conversion
|
58
|
+
test_files:
|
59
|
+
- test/urlify_test.rb
|