kebab 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/Changelog.md +99 -0
- data/MIT-LICENSE +19 -0
- data/README.md +26 -0
- data/Rakefile +34 -0
- data/lib/kebab.rb +18 -0
- data/lib/kebab/identifier.rb +294 -0
- data/lib/kebab/transliterator/base.rb +110 -0
- data/lib/kebab/transliterator/bulgarian.rb +27 -0
- data/lib/kebab/transliterator/cyrillic.rb +108 -0
- data/lib/kebab/transliterator/danish.rb +15 -0
- data/lib/kebab/transliterator/german.rb +15 -0
- data/lib/kebab/transliterator/greek.rb +77 -0
- data/lib/kebab/transliterator/hindi.rb +137 -0
- data/lib/kebab/transliterator/latin.rb +199 -0
- data/lib/kebab/transliterator/macedonian.rb +29 -0
- data/lib/kebab/transliterator/norwegian.rb +14 -0
- data/lib/kebab/transliterator/romanian.rb +13 -0
- data/lib/kebab/transliterator/russian.rb +22 -0
- data/lib/kebab/transliterator/serbian.rb +34 -0
- data/lib/kebab/transliterator/spanish.rb +9 -0
- data/lib/kebab/transliterator/swedish.rb +16 -0
- data/lib/kebab/transliterator/turkish.rb +8 -0
- data/lib/kebab/transliterator/ukrainian.rb +30 -0
- data/lib/kebab/transliterator/vietnamese.rb +143 -0
- data/lib/kebab/utf8/active_support_proxy.rb +26 -0
- data/lib/kebab/utf8/dumb_proxy.rb +49 -0
- data/lib/kebab/utf8/java_proxy.rb +22 -0
- data/lib/kebab/utf8/mappings.rb +193 -0
- data/lib/kebab/utf8/proxy.rb +125 -0
- data/lib/kebab/utf8/unicode_proxy.rb +23 -0
- data/lib/kebab/version.rb +5 -0
- data/spec/kebab_spec.rb +155 -0
- data/spec/spec_helper.rb +45 -0
- data/spec/transliterators/base_spec.rb +16 -0
- data/spec/transliterators/bulgarian_spec.rb +20 -0
- data/spec/transliterators/danish_spec.rb +17 -0
- data/spec/transliterators/german_spec.rb +17 -0
- data/spec/transliterators/greek_spec.rb +17 -0
- data/spec/transliterators/hindi_spec.rb +17 -0
- data/spec/transliterators/latin_spec.rb +9 -0
- data/spec/transliterators/macedonian_spec.rb +9 -0
- data/spec/transliterators/norwegian_spec.rb +18 -0
- data/spec/transliterators/polish_spec.rb +14 -0
- data/spec/transliterators/romanian_spec.rb +19 -0
- data/spec/transliterators/russian_spec.rb +9 -0
- data/spec/transliterators/serbian_spec.rb +25 -0
- data/spec/transliterators/spanish_spec.rb +13 -0
- data/spec/transliterators/swedish_spec.rb +18 -0
- data/spec/transliterators/turkish_spec.rb +24 -0
- data/spec/transliterators/ukrainian_spec.rb +88 -0
- data/spec/transliterators/vietnamese_spec.rb +18 -0
- data/spec/utf8_proxy_spec.rb +53 -0
- metadata +167 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 87bf31a3fd0eae739657da5215e01e9997814e36fac547bdad9a3b82b4a7cbb5
|
4
|
+
data.tar.gz: ba4be2bf8b9d9f531c2e89cc378fb77bb46c2bafd1238938750d6ca703fd8e72
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fecfbf09cebc692a71693ccf8e1153135164f0dc353338dd262a2e1ce459fe706d95567bf1f7d65f9d0ef09d4c79df4c345c4c62572b2b4187686604ea5a39f0
|
7
|
+
data.tar.gz: 4ac136a1bdda3d4a7dfb986a75580c2b06882799aa8f6f7b20c8573ff3394e950c610ec9bafc0f8729b2b666bb4bda1cc5b73323730ccd57ef14fc3947e7b332
|
data/.gemtest
ADDED
File without changes
|
data/Changelog.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# Kebab Changelog
|
2
|
+
|
3
|
+
## 1.0.2
|
4
|
+
|
5
|
+
* Fix regression in ActiveSupport UTF8 proxy.
|
6
|
+
|
7
|
+
## 1.0.1
|
8
|
+
|
9
|
+
* Fix error with tidy_bytes on Rubinius.
|
10
|
+
* Simplify Active Support UTF8 proxy.
|
11
|
+
* Fix `allow_bangs` argument to to_ruby_method being silently ignored.
|
12
|
+
* Raise error when generating an impossible Ruby method name.
|
13
|
+
|
14
|
+
## 1.0.0
|
15
|
+
|
16
|
+
* Adopt semantic versioning.
|
17
|
+
* When using Active Support, require 3.2 or greater.
|
18
|
+
* Require Ruby 2.0 or greater.
|
19
|
+
* Fix Ruby warnings.
|
20
|
+
* Improve support for Ukrainian.
|
21
|
+
* Support some additional punctuation characters used by Chinese and others.
|
22
|
+
* Add Polish spec.
|
23
|
+
* Use native Unicode normalization on Ruby 2.2 in UTF8::DumbProxy.
|
24
|
+
* Invoke Ruby-native upcase/downcase in UTF8::DumbProxy.
|
25
|
+
* Proxy `tidy_bytes` method to Active Support when possible.
|
26
|
+
* Remove SlugString constant.
|
27
|
+
|
28
|
+
## 0.3.11
|
29
|
+
|
30
|
+
* Add support for Vietnamese.
|
31
|
+
|
32
|
+
## 0.3.10
|
33
|
+
|
34
|
+
* Fix Macedonian "S/S". Don't `include JRuby` unnecessarily.
|
35
|
+
|
36
|
+
## 0.3.9
|
37
|
+
|
38
|
+
* Add missing Greek vowels with diaeresis.
|
39
|
+
|
40
|
+
## 0.3.8
|
41
|
+
|
42
|
+
* Correct and improve Macedonian support.
|
43
|
+
|
44
|
+
## 0.3.7
|
45
|
+
|
46
|
+
* Fix compatibility with Ruby 1.8.7.
|
47
|
+
* Add Swedish support.
|
48
|
+
|
49
|
+
## 0.3.6
|
50
|
+
|
51
|
+
* Allow multiple transliterators.
|
52
|
+
* Add Greek support.
|
53
|
+
|
54
|
+
## 0.3.5
|
55
|
+
|
56
|
+
* Don't strip underscores from identifiers.
|
57
|
+
|
58
|
+
## 0.3.4
|
59
|
+
|
60
|
+
* Add Romanian support.
|
61
|
+
|
62
|
+
## 0.3.3
|
63
|
+
|
64
|
+
* Add Norwegian support.
|
65
|
+
|
66
|
+
## 0.3.2
|
67
|
+
|
68
|
+
* Improve Macedonian support.
|
69
|
+
|
70
|
+
## 0.3.1
|
71
|
+
|
72
|
+
* Small fixes to Cyrillic.
|
73
|
+
|
74
|
+
## 0.3.0
|
75
|
+
|
76
|
+
* Cyrillic support.
|
77
|
+
* Improve support for various Unicode spaces and dashes.
|
78
|
+
|
79
|
+
## 0.2.2
|
80
|
+
|
81
|
+
* Fix for "smart" quote handling.
|
82
|
+
|
83
|
+
## 0.2.1
|
84
|
+
|
85
|
+
* Implement #empty? for compatiblity with Active Support's #blank?.
|
86
|
+
|
87
|
+
## 0.2.0
|
88
|
+
|
89
|
+
* Add support for Danish.
|
90
|
+
* Add method to generate Ruby identifiers.
|
91
|
+
* Improve performance.
|
92
|
+
|
93
|
+
## 0.1.1
|
94
|
+
|
95
|
+
* Add support for Serbian.
|
96
|
+
|
97
|
+
## 0.1.0
|
98
|
+
|
99
|
+
* Initial extraction from FriendlyId.
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2010 Norman Clarke
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Kebab
|
2
|
+
|
3
|
+
The original is [here](http://github.com/norman/kebab). You should probably
|
4
|
+
use that unless you hate monkey patching as much as I do.
|
5
|
+
|
6
|
+
## Copyright
|
7
|
+
|
8
|
+
Copyright (c) 2010-2013 Norman Clarke
|
9
|
+
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
11
|
+
this software and associated documentation files (the "Software"), to deal in
|
12
|
+
the Software without restriction, including without limitation the rights to
|
13
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
14
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
15
|
+
so, subject to the following conditions:
|
16
|
+
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
18
|
+
copies or substantial portions of the Software.
|
19
|
+
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
|
+
SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "rake/testtask"
|
3
|
+
require "rake/clean"
|
4
|
+
require "rubygems/package_task"
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
task :test => :spec
|
8
|
+
|
9
|
+
CLEAN << "pkg" << "doc" << "coverage" << ".yardoc"
|
10
|
+
|
11
|
+
begin
|
12
|
+
require "yard"
|
13
|
+
YARD::Rake::YardocTask.new do |t|
|
14
|
+
t.options = ["--output-dir=doc"]
|
15
|
+
end
|
16
|
+
rescue LoadError
|
17
|
+
end
|
18
|
+
|
19
|
+
begin
|
20
|
+
desc "Run SimpleCov"
|
21
|
+
task :coverage do
|
22
|
+
ENV["COV"] = "true"
|
23
|
+
Rake::Task["spec"].execute
|
24
|
+
end
|
25
|
+
rescue LoadError
|
26
|
+
end
|
27
|
+
|
28
|
+
gemspec = File.expand_path("../kebab.gemspec", __FILE__)
|
29
|
+
if File.exist? gemspec
|
30
|
+
Gem::PackageTask.new(eval(File.read(gemspec))) { |pkg| }
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'rspec/core/rake_task'
|
34
|
+
RSpec::Core::RakeTask.new(:spec)
|
data/lib/kebab.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
module Kebab
|
2
|
+
def self.jruby15?
|
3
|
+
JRUBY_VERSION >= "1.5" rescue false
|
4
|
+
end
|
5
|
+
|
6
|
+
refine String do
|
7
|
+
def to_identifier
|
8
|
+
Kebab::Identifier.new self
|
9
|
+
end
|
10
|
+
|
11
|
+
alias to_slug to_identifier
|
12
|
+
alias skewer to_identifier
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
require "kebab/transliterator/base"
|
17
|
+
require "kebab/utf8/proxy"
|
18
|
+
require "kebab/identifier"
|
@@ -0,0 +1,294 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Kebab
|
3
|
+
|
4
|
+
# Codepoints for characters that will be deleted by +#word_chars!+.
|
5
|
+
STRIPPABLE = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19,
|
6
|
+
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39,
|
7
|
+
40, 41, 42, 43, 44, 45, 46, 47, 58, 59, 60, 61, 62, 63, 64, 91, 92, 93, 94,
|
8
|
+
96, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
|
9
|
+
137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
|
10
|
+
152, 153, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167,
|
11
|
+
168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 182, 183, 184,
|
12
|
+
185, 187, 188, 189, 190, 191, 215, 247, 8203, 8204, 8205, 8239, 65279]
|
13
|
+
|
14
|
+
# This class provides some string-manipulation methods specific to slugs.
|
15
|
+
#
|
16
|
+
# Note that this class includes many "bang methods" such as {#clean!} and
|
17
|
+
# {#normalize!} that perform actions on the string in-place. Each of these
|
18
|
+
# methods has a corresponding "bangless" method (i.e., +Identifier#clean!+
|
19
|
+
# and +Identifier#clean+) which does not appear in the documentation because
|
20
|
+
# it is generated dynamically.
|
21
|
+
#
|
22
|
+
# All of the bang methods return an instance of String, while the bangless
|
23
|
+
# versions return an instance of Kebab::Identifier, so that calls to methods
|
24
|
+
# specific to this class can be chained:
|
25
|
+
#
|
26
|
+
# string = Identifier.new("hello world")
|
27
|
+
# string.with_separators! # => "hello-world"
|
28
|
+
# string.with_separators # => <Kebab::Identifier:0x000001013e1590 @wrapped_string="hello-world">
|
29
|
+
#
|
30
|
+
# @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
|
31
|
+
class Identifier
|
32
|
+
using Kebab
|
33
|
+
|
34
|
+
Error = Class.new(StandardError)
|
35
|
+
|
36
|
+
attr_reader :wrapped_string
|
37
|
+
alias to_s wrapped_string
|
38
|
+
|
39
|
+
@@utf8_proxy = if Kebab.jruby15?
|
40
|
+
UTF8::JavaProxy
|
41
|
+
elsif defined? Unicode::VERSION
|
42
|
+
UTF8::UnicodeProxy
|
43
|
+
elsif defined? ActiveSupport
|
44
|
+
UTF8::ActiveSupportProxy
|
45
|
+
else
|
46
|
+
UTF8::DumbProxy
|
47
|
+
end
|
48
|
+
|
49
|
+
# Return the proxy used for UTF-8 support.
|
50
|
+
# @see Kebab::UTF8::Proxy
|
51
|
+
def self.utf8_proxy
|
52
|
+
@@utf8_proxy
|
53
|
+
end
|
54
|
+
|
55
|
+
# Set a proxy object used for UTF-8 support.
|
56
|
+
# @see Kebab::UTF8::Proxy
|
57
|
+
def self.utf8_proxy=(obj)
|
58
|
+
@@utf8_proxy = obj
|
59
|
+
end
|
60
|
+
|
61
|
+
def method_missing(symbol, *args, &block)
|
62
|
+
@wrapped_string.__send__(symbol, *args, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param string [#to_s] The string to use as the basis of the Identifier.
|
66
|
+
def initialize(string)
|
67
|
+
@wrapped_string = string.to_s
|
68
|
+
tidy_bytes!
|
69
|
+
normalize_utf8!
|
70
|
+
end
|
71
|
+
|
72
|
+
def ==(value)
|
73
|
+
@wrapped_string.to_s == value.to_s
|
74
|
+
end
|
75
|
+
|
76
|
+
def eql?(value)
|
77
|
+
@wrapped_string == value
|
78
|
+
end
|
79
|
+
|
80
|
+
def empty?
|
81
|
+
# included to make this class :respond_to? :empty for compatibility with Active Support's
|
82
|
+
# #blank?
|
83
|
+
@wrapped_string.empty?
|
84
|
+
end
|
85
|
+
|
86
|
+
# Approximate an ASCII string. This works only for Western strings using
|
87
|
+
# characters that are Roman-alphabet characters + diacritics. Non-letter
|
88
|
+
# characters are left unmodified.
|
89
|
+
#
|
90
|
+
# string = Identifier.new "Łódź
|
91
|
+
# string.transliterate # => "Lodz, Poland"
|
92
|
+
# string = Identifier.new "日本"
|
93
|
+
# string.transliterate # => "日本"
|
94
|
+
#
|
95
|
+
# You can pass any key(s) from +Characters.approximations+ as arguments. This allows
|
96
|
+
# for contextual approximations. Various languages are supported, you can see which ones
|
97
|
+
# by looking at the source of {Kebab::Transliterator::Base}.
|
98
|
+
#
|
99
|
+
# string = Identifier.new "Jürgen Müller"
|
100
|
+
# string.transliterate # => "Jurgen Muller"
|
101
|
+
# string.transliterate :german # => "Juergen Mueller"
|
102
|
+
# string = Identifier.new "¡Feliz año!"
|
103
|
+
# string.transliterate # => "¡Feliz ano!"
|
104
|
+
# string.transliterate :spanish # => "¡Feliz anio!"
|
105
|
+
#
|
106
|
+
# The approximations are an array, which you can modify if you choose:
|
107
|
+
#
|
108
|
+
# # Make Spanish use "nh" rather than "nn"
|
109
|
+
# Kebab::Transliterator::Spanish::APPROXIMATIONS["ñ"] = "nh"
|
110
|
+
#
|
111
|
+
# Notice that this method does not simply convert to ASCII; if you want
|
112
|
+
# to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
|
113
|
+
#
|
114
|
+
# string.transliterate!(:spanish) # => "¡Feliz anio!"
|
115
|
+
# string.transliterate! # => "¡Feliz anio!"
|
116
|
+
#
|
117
|
+
# @param *args <Symbol>
|
118
|
+
# @return String
|
119
|
+
def transliterate!(*kinds)
|
120
|
+
kinds.compact!
|
121
|
+
kinds = [:latin] if kinds.empty?
|
122
|
+
kinds.each do |kind|
|
123
|
+
transliterator = Transliterator.get(kind).instance
|
124
|
+
@wrapped_string = transliterator.transliterate(@wrapped_string)
|
125
|
+
end
|
126
|
+
@wrapped_string
|
127
|
+
end
|
128
|
+
|
129
|
+
# Converts dashes to spaces, removes leading and trailing spaces, and
|
130
|
+
# replaces multiple whitespace characters with a single space.
|
131
|
+
# @return String
|
132
|
+
def clean!
|
133
|
+
@wrapped_string = @wrapped_string.gsub("-", " ").squeeze(" ").strip
|
134
|
+
end
|
135
|
+
|
136
|
+
# Remove any non-word characters. For this library's purposes, this means
|
137
|
+
# anything other than letters, numbers, spaces, newlines and linefeeds.
|
138
|
+
# @return String
|
139
|
+
def word_chars!
|
140
|
+
@wrapped_string = (unpack("U*") - Kebab::STRIPPABLE).pack("U*")
|
141
|
+
end
|
142
|
+
|
143
|
+
# Normalize the string for use as a URL slug. Note that in this context,
|
144
|
+
# +normalize+ means, strip, remove non-letters/numbers, downcasing,
|
145
|
+
# truncating to 255 bytes and converting whitespace to dashes.
|
146
|
+
# @param Options
|
147
|
+
# @return String
|
148
|
+
def normalize!(options = nil)
|
149
|
+
options = default_normalize_options.merge(options || {})
|
150
|
+
|
151
|
+
if translit_option = options[:transliterate]
|
152
|
+
if translit_option != true
|
153
|
+
transliterate!(*translit_option)
|
154
|
+
else
|
155
|
+
transliterate!(*options[:transliterations])
|
156
|
+
end
|
157
|
+
end
|
158
|
+
to_ascii! if options[:to_ascii]
|
159
|
+
clean!
|
160
|
+
word_chars!
|
161
|
+
clean!
|
162
|
+
downcase!
|
163
|
+
truncate_bytes!(options[:max_length])
|
164
|
+
with_separators!(options[:separator])
|
165
|
+
end
|
166
|
+
|
167
|
+
# Normalize a string so that it can safely be used as a Ruby method name.
|
168
|
+
def to_ruby_method!(allow_bangs = true)
|
169
|
+
leader, trailer = @wrapped_string.strip.scan(/\A(.+)(.)\z/).flatten
|
170
|
+
leader = leader.to_s
|
171
|
+
trailer = trailer.to_s
|
172
|
+
if allow_bangs
|
173
|
+
trailer.downcase!
|
174
|
+
trailer.gsub!(/[^a-z0-9!=\\?]/, '')
|
175
|
+
else
|
176
|
+
trailer.downcase!
|
177
|
+
trailer.gsub!(/[^a-z0-9]/, '')
|
178
|
+
end
|
179
|
+
id = leader.to_identifier
|
180
|
+
id.transliterate!
|
181
|
+
id.to_ascii!
|
182
|
+
id.clean!
|
183
|
+
id.word_chars!
|
184
|
+
id.clean!
|
185
|
+
@wrapped_string = id.to_s + trailer
|
186
|
+
if @wrapped_string == ""
|
187
|
+
raise Error, "Input generates impossible Ruby method name"
|
188
|
+
end
|
189
|
+
with_separators!("_")
|
190
|
+
end
|
191
|
+
|
192
|
+
# Delete any non-ascii characters.
|
193
|
+
# @return String
|
194
|
+
def to_ascii!
|
195
|
+
@wrapped_string = @wrapped_string.gsub(/[^\x00-\x7f]/u, '')
|
196
|
+
end
|
197
|
+
|
198
|
+
# Truncate the string to +max+ characters.
|
199
|
+
# @example
|
200
|
+
# "üéøá".to_identifier.truncate(3) #=> "üéø"
|
201
|
+
# @return String
|
202
|
+
def truncate!(max)
|
203
|
+
@wrapped_string = unpack("U*")[0...max].pack("U*")
|
204
|
+
end
|
205
|
+
|
206
|
+
# Truncate the string to +max+ bytes. This can be useful for ensuring that
|
207
|
+
# a UTF-8 string will always fit into a database column with a certain max
|
208
|
+
# byte length. The resulting string may be less than +max+ if the string must
|
209
|
+
# be truncated at a multibyte character boundary.
|
210
|
+
# @example
|
211
|
+
# "üéøá".to_identifier.truncate_bytes(3) #=> "ü"
|
212
|
+
# @return String
|
213
|
+
def truncate_bytes!(max)
|
214
|
+
return @wrapped_string if @wrapped_string.bytesize <= max
|
215
|
+
curr = 0
|
216
|
+
new = []
|
217
|
+
unpack("U*").each do |char|
|
218
|
+
break if curr > max
|
219
|
+
char = [char].pack("U")
|
220
|
+
curr += char.bytesize
|
221
|
+
if curr <= max
|
222
|
+
new << char
|
223
|
+
end
|
224
|
+
end
|
225
|
+
@wrapped_string = new.join
|
226
|
+
end
|
227
|
+
|
228
|
+
# Replaces whitespace with dashes ("-").
|
229
|
+
# @return String
|
230
|
+
def with_separators!(char = "-")
|
231
|
+
@wrapped_string = @wrapped_string.gsub(/\s/u, char)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Perform UTF-8 sensitive upcasing.
|
235
|
+
# @return String
|
236
|
+
def upcase!
|
237
|
+
@wrapped_string = @@utf8_proxy.upcase(@wrapped_string)
|
238
|
+
end
|
239
|
+
|
240
|
+
# Perform UTF-8 sensitive downcasing.
|
241
|
+
# @return String
|
242
|
+
def downcase!
|
243
|
+
@wrapped_string = @@utf8_proxy.downcase(@wrapped_string)
|
244
|
+
end
|
245
|
+
|
246
|
+
# Perform Unicode composition on the wrapped string.
|
247
|
+
# @return String
|
248
|
+
def normalize_utf8!
|
249
|
+
@wrapped_string = @@utf8_proxy.normalize_utf8(@wrapped_string)
|
250
|
+
end
|
251
|
+
|
252
|
+
# Attempt to convert characters encoded using CP1252 and IS0-8859-1 to
|
253
|
+
# UTF-8.
|
254
|
+
# @return String
|
255
|
+
def tidy_bytes!
|
256
|
+
@wrapped_string = @@utf8_proxy.tidy_bytes(@wrapped_string)
|
257
|
+
end
|
258
|
+
|
259
|
+
%w[transliterate clean downcase word_chars normalize normalize_utf8
|
260
|
+
tidy_bytes to_ascii to_ruby_method truncate truncate_bytes upcase
|
261
|
+
with_separators].each do |method|
|
262
|
+
class_eval(<<-EOM, __FILE__, __LINE__ + 1)
|
263
|
+
def #{method}(*args)
|
264
|
+
send_to_new_instance(:#{method}!, *args)
|
265
|
+
end
|
266
|
+
EOM
|
267
|
+
end
|
268
|
+
|
269
|
+
def to_identifier
|
270
|
+
self
|
271
|
+
end
|
272
|
+
|
273
|
+
# The default options for {#normalize!}. Override to set your own defaults.
|
274
|
+
def default_normalize_options
|
275
|
+
{:transliterate => true, :max_length => 255, :separator => "-"}
|
276
|
+
end
|
277
|
+
|
278
|
+
alias approximate_ascii transliterate
|
279
|
+
alias approximate_ascii! transliterate!
|
280
|
+
alias with_dashes with_separators
|
281
|
+
alias with_dashes! with_separators!
|
282
|
+
alias to_slug to_identifier
|
283
|
+
|
284
|
+
private
|
285
|
+
|
286
|
+
# Used as the basis of the bangless methods.
|
287
|
+
def send_to_new_instance(*args)
|
288
|
+
id = Identifier.allocate
|
289
|
+
id.instance_variable_set :@wrapped_string, to_s
|
290
|
+
id.send(*args)
|
291
|
+
id
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|