kebab 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/Changelog.md +99 -0
- data/MIT-LICENSE +19 -0
- data/README.md +26 -0
- data/Rakefile +34 -0
- data/lib/kebab.rb +18 -0
- data/lib/kebab/identifier.rb +294 -0
- data/lib/kebab/transliterator/base.rb +110 -0
- data/lib/kebab/transliterator/bulgarian.rb +27 -0
- data/lib/kebab/transliterator/cyrillic.rb +108 -0
- data/lib/kebab/transliterator/danish.rb +15 -0
- data/lib/kebab/transliterator/german.rb +15 -0
- data/lib/kebab/transliterator/greek.rb +77 -0
- data/lib/kebab/transliterator/hindi.rb +137 -0
- data/lib/kebab/transliterator/latin.rb +199 -0
- data/lib/kebab/transliterator/macedonian.rb +29 -0
- data/lib/kebab/transliterator/norwegian.rb +14 -0
- data/lib/kebab/transliterator/romanian.rb +13 -0
- data/lib/kebab/transliterator/russian.rb +22 -0
- data/lib/kebab/transliterator/serbian.rb +34 -0
- data/lib/kebab/transliterator/spanish.rb +9 -0
- data/lib/kebab/transliterator/swedish.rb +16 -0
- data/lib/kebab/transliterator/turkish.rb +8 -0
- data/lib/kebab/transliterator/ukrainian.rb +30 -0
- data/lib/kebab/transliterator/vietnamese.rb +143 -0
- data/lib/kebab/utf8/active_support_proxy.rb +26 -0
- data/lib/kebab/utf8/dumb_proxy.rb +49 -0
- data/lib/kebab/utf8/java_proxy.rb +22 -0
- data/lib/kebab/utf8/mappings.rb +193 -0
- data/lib/kebab/utf8/proxy.rb +125 -0
- data/lib/kebab/utf8/unicode_proxy.rb +23 -0
- data/lib/kebab/version.rb +5 -0
- data/spec/kebab_spec.rb +155 -0
- data/spec/spec_helper.rb +45 -0
- data/spec/transliterators/base_spec.rb +16 -0
- data/spec/transliterators/bulgarian_spec.rb +20 -0
- data/spec/transliterators/danish_spec.rb +17 -0
- data/spec/transliterators/german_spec.rb +17 -0
- data/spec/transliterators/greek_spec.rb +17 -0
- data/spec/transliterators/hindi_spec.rb +17 -0
- data/spec/transliterators/latin_spec.rb +9 -0
- data/spec/transliterators/macedonian_spec.rb +9 -0
- data/spec/transliterators/norwegian_spec.rb +18 -0
- data/spec/transliterators/polish_spec.rb +14 -0
- data/spec/transliterators/romanian_spec.rb +19 -0
- data/spec/transliterators/russian_spec.rb +9 -0
- data/spec/transliterators/serbian_spec.rb +25 -0
- data/spec/transliterators/spanish_spec.rb +13 -0
- data/spec/transliterators/swedish_spec.rb +18 -0
- data/spec/transliterators/turkish_spec.rb +24 -0
- data/spec/transliterators/ukrainian_spec.rb +88 -0
- data/spec/transliterators/vietnamese_spec.rb +18 -0
- data/spec/utf8_proxy_spec.rb +53 -0
- metadata +167 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 87bf31a3fd0eae739657da5215e01e9997814e36fac547bdad9a3b82b4a7cbb5
|
4
|
+
data.tar.gz: ba4be2bf8b9d9f531c2e89cc378fb77bb46c2bafd1238938750d6ca703fd8e72
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fecfbf09cebc692a71693ccf8e1153135164f0dc353338dd262a2e1ce459fe706d95567bf1f7d65f9d0ef09d4c79df4c345c4c62572b2b4187686604ea5a39f0
|
7
|
+
data.tar.gz: 4ac136a1bdda3d4a7dfb986a75580c2b06882799aa8f6f7b20c8573ff3394e950c610ec9bafc0f8729b2b666bb4bda1cc5b73323730ccd57ef14fc3947e7b332
|
data/.gemtest
ADDED
File without changes
|
data/Changelog.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# Kebab Changelog
|
2
|
+
|
3
|
+
## 1.0.2
|
4
|
+
|
5
|
+
* Fix regression in ActiveSupport UTF8 proxy.
|
6
|
+
|
7
|
+
## 1.0.1
|
8
|
+
|
9
|
+
* Fix error with tidy_bytes on Rubinius.
|
10
|
+
* Simplify Active Support UTF8 proxy.
|
11
|
+
* Fix `allow_bangs` argument to to_ruby_method being silently ignored.
|
12
|
+
* Raise error when generating an impossible Ruby method name.
|
13
|
+
|
14
|
+
## 1.0.0
|
15
|
+
|
16
|
+
* Adopt semantic versioning.
|
17
|
+
* When using Active Support, require 3.2 or greater.
|
18
|
+
* Require Ruby 2.0 or greater.
|
19
|
+
* Fix Ruby warnings.
|
20
|
+
* Improve support for Ukrainian.
|
21
|
+
* Support some additional punctuation characters used by Chinese and others.
|
22
|
+
* Add Polish spec.
|
23
|
+
* Use native Unicode normalization on Ruby 2.2 in UTF8::DumbProxy.
|
24
|
+
* Invoke Ruby-native upcase/downcase in UTF8::DumbProxy.
|
25
|
+
* Proxy `tidy_bytes` method to Active Support when possible.
|
26
|
+
* Remove SlugString constant.
|
27
|
+
|
28
|
+
## 0.3.11
|
29
|
+
|
30
|
+
* Add support for Vietnamese.
|
31
|
+
|
32
|
+
## 0.3.10
|
33
|
+
|
34
|
+
* Fix Macedonian "S/S". Don't `include JRuby` unnecessarily.
|
35
|
+
|
36
|
+
## 0.3.9
|
37
|
+
|
38
|
+
* Add missing Greek vowels with diaeresis.
|
39
|
+
|
40
|
+
## 0.3.8
|
41
|
+
|
42
|
+
* Correct and improve Macedonian support.
|
43
|
+
|
44
|
+
## 0.3.7
|
45
|
+
|
46
|
+
* Fix compatibility with Ruby 1.8.7.
|
47
|
+
* Add Swedish support.
|
48
|
+
|
49
|
+
## 0.3.6
|
50
|
+
|
51
|
+
* Allow multiple transliterators.
|
52
|
+
* Add Greek support.
|
53
|
+
|
54
|
+
## 0.3.5
|
55
|
+
|
56
|
+
* Don't strip underscores from identifiers.
|
57
|
+
|
58
|
+
## 0.3.4
|
59
|
+
|
60
|
+
* Add Romanian support.
|
61
|
+
|
62
|
+
## 0.3.3
|
63
|
+
|
64
|
+
* Add Norwegian support.
|
65
|
+
|
66
|
+
## 0.3.2
|
67
|
+
|
68
|
+
* Improve Macedonian support.
|
69
|
+
|
70
|
+
## 0.3.1
|
71
|
+
|
72
|
+
* Small fixes to Cyrillic.
|
73
|
+
|
74
|
+
## 0.3.0
|
75
|
+
|
76
|
+
* Cyrillic support.
|
77
|
+
* Improve support for various Unicode spaces and dashes.
|
78
|
+
|
79
|
+
## 0.2.2
|
80
|
+
|
81
|
+
* Fix for "smart" quote handling.
|
82
|
+
|
83
|
+
## 0.2.1
|
84
|
+
|
85
|
+
* Implement #empty? for compatiblity with Active Support's #blank?.
|
86
|
+
|
87
|
+
## 0.2.0
|
88
|
+
|
89
|
+
* Add support for Danish.
|
90
|
+
* Add method to generate Ruby identifiers.
|
91
|
+
* Improve performance.
|
92
|
+
|
93
|
+
## 0.1.1
|
94
|
+
|
95
|
+
* Add support for Serbian.
|
96
|
+
|
97
|
+
## 0.1.0
|
98
|
+
|
99
|
+
* Initial extraction from FriendlyId.
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2010 Norman Clarke
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Kebab
|
2
|
+
|
3
|
+
The original is [here](http://github.com/norman/kebab). You should probably
|
4
|
+
use that unless you hate monkey patching as much as I do.
|
5
|
+
|
6
|
+
## Copyright
|
7
|
+
|
8
|
+
Copyright (c) 2010-2013 Norman Clarke
|
9
|
+
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
11
|
+
this software and associated documentation files (the "Software"), to deal in
|
12
|
+
the Software without restriction, including without limitation the rights to
|
13
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
14
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
15
|
+
so, subject to the following conditions:
|
16
|
+
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
18
|
+
copies or substantial portions of the Software.
|
19
|
+
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
|
+
SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "rake/testtask"
|
3
|
+
require "rake/clean"
|
4
|
+
require "rubygems/package_task"
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
task :test => :spec
|
8
|
+
|
9
|
+
CLEAN << "pkg" << "doc" << "coverage" << ".yardoc"
|
10
|
+
|
11
|
+
begin
|
12
|
+
require "yard"
|
13
|
+
YARD::Rake::YardocTask.new do |t|
|
14
|
+
t.options = ["--output-dir=doc"]
|
15
|
+
end
|
16
|
+
rescue LoadError
|
17
|
+
end
|
18
|
+
|
19
|
+
begin
|
20
|
+
desc "Run SimpleCov"
|
21
|
+
task :coverage do
|
22
|
+
ENV["COV"] = "true"
|
23
|
+
Rake::Task["spec"].execute
|
24
|
+
end
|
25
|
+
rescue LoadError
|
26
|
+
end
|
27
|
+
|
28
|
+
gemspec = File.expand_path("../kebab.gemspec", __FILE__)
|
29
|
+
if File.exist? gemspec
|
30
|
+
Gem::PackageTask.new(eval(File.read(gemspec))) { |pkg| }
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'rspec/core/rake_task'
|
34
|
+
RSpec::Core::RakeTask.new(:spec)
|
data/lib/kebab.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
module Kebab
|
2
|
+
def self.jruby15?
|
3
|
+
JRUBY_VERSION >= "1.5" rescue false
|
4
|
+
end
|
5
|
+
|
6
|
+
refine String do
|
7
|
+
def to_identifier
|
8
|
+
Kebab::Identifier.new self
|
9
|
+
end
|
10
|
+
|
11
|
+
alias to_slug to_identifier
|
12
|
+
alias skewer to_identifier
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
require "kebab/transliterator/base"
|
17
|
+
require "kebab/utf8/proxy"
|
18
|
+
require "kebab/identifier"
|
@@ -0,0 +1,294 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Kebab
|
3
|
+
|
4
|
+
# Codepoints for characters that will be deleted by +#word_chars!+.
|
5
|
+
STRIPPABLE = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19,
|
6
|
+
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39,
|
7
|
+
40, 41, 42, 43, 44, 45, 46, 47, 58, 59, 60, 61, 62, 63, 64, 91, 92, 93, 94,
|
8
|
+
96, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
|
9
|
+
137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
|
10
|
+
152, 153, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167,
|
11
|
+
168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 182, 183, 184,
|
12
|
+
185, 187, 188, 189, 190, 191, 215, 247, 8203, 8204, 8205, 8239, 65279]
|
13
|
+
|
14
|
+
# This class provides some string-manipulation methods specific to slugs.
|
15
|
+
#
|
16
|
+
# Note that this class includes many "bang methods" such as {#clean!} and
|
17
|
+
# {#normalize!} that perform actions on the string in-place. Each of these
|
18
|
+
# methods has a corresponding "bangless" method (i.e., +Identifier#clean!+
|
19
|
+
# and +Identifier#clean+) which does not appear in the documentation because
|
20
|
+
# it is generated dynamically.
|
21
|
+
#
|
22
|
+
# All of the bang methods return an instance of String, while the bangless
|
23
|
+
# versions return an instance of Kebab::Identifier, so that calls to methods
|
24
|
+
# specific to this class can be chained:
|
25
|
+
#
|
26
|
+
# string = Identifier.new("hello world")
|
27
|
+
# string.with_separators! # => "hello-world"
|
28
|
+
# string.with_separators # => <Kebab::Identifier:0x000001013e1590 @wrapped_string="hello-world">
|
29
|
+
#
|
30
|
+
# @see http://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec Unicode character table
|
31
|
+
class Identifier
|
32
|
+
using Kebab
|
33
|
+
|
34
|
+
Error = Class.new(StandardError)
|
35
|
+
|
36
|
+
attr_reader :wrapped_string
|
37
|
+
alias to_s wrapped_string
|
38
|
+
|
39
|
+
@@utf8_proxy = if Kebab.jruby15?
|
40
|
+
UTF8::JavaProxy
|
41
|
+
elsif defined? Unicode::VERSION
|
42
|
+
UTF8::UnicodeProxy
|
43
|
+
elsif defined? ActiveSupport
|
44
|
+
UTF8::ActiveSupportProxy
|
45
|
+
else
|
46
|
+
UTF8::DumbProxy
|
47
|
+
end
|
48
|
+
|
49
|
+
# Return the proxy used for UTF-8 support.
|
50
|
+
# @see Kebab::UTF8::Proxy
|
51
|
+
def self.utf8_proxy
|
52
|
+
@@utf8_proxy
|
53
|
+
end
|
54
|
+
|
55
|
+
# Set a proxy object used for UTF-8 support.
|
56
|
+
# @see Kebab::UTF8::Proxy
|
57
|
+
def self.utf8_proxy=(obj)
|
58
|
+
@@utf8_proxy = obj
|
59
|
+
end
|
60
|
+
|
61
|
+
def method_missing(symbol, *args, &block)
|
62
|
+
@wrapped_string.__send__(symbol, *args, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param string [#to_s] The string to use as the basis of the Identifier.
|
66
|
+
def initialize(string)
|
67
|
+
@wrapped_string = string.to_s
|
68
|
+
tidy_bytes!
|
69
|
+
normalize_utf8!
|
70
|
+
end
|
71
|
+
|
72
|
+
def ==(value)
|
73
|
+
@wrapped_string.to_s == value.to_s
|
74
|
+
end
|
75
|
+
|
76
|
+
def eql?(value)
|
77
|
+
@wrapped_string == value
|
78
|
+
end
|
79
|
+
|
80
|
+
def empty?
|
81
|
+
# included to make this class :respond_to? :empty for compatibility with Active Support's
|
82
|
+
# #blank?
|
83
|
+
@wrapped_string.empty?
|
84
|
+
end
|
85
|
+
|
86
|
+
# Approximate an ASCII string. This works only for Western strings using
|
87
|
+
# characters that are Roman-alphabet characters + diacritics. Non-letter
|
88
|
+
# characters are left unmodified.
|
89
|
+
#
|
90
|
+
# string = Identifier.new "Łódź
|
91
|
+
# string.transliterate # => "Lodz, Poland"
|
92
|
+
# string = Identifier.new "日本"
|
93
|
+
# string.transliterate # => "日本"
|
94
|
+
#
|
95
|
+
# You can pass any key(s) from +Characters.approximations+ as arguments. This allows
|
96
|
+
# for contextual approximations. Various languages are supported, you can see which ones
|
97
|
+
# by looking at the source of {Kebab::Transliterator::Base}.
|
98
|
+
#
|
99
|
+
# string = Identifier.new "Jürgen Müller"
|
100
|
+
# string.transliterate # => "Jurgen Muller"
|
101
|
+
# string.transliterate :german # => "Juergen Mueller"
|
102
|
+
# string = Identifier.new "¡Feliz año!"
|
103
|
+
# string.transliterate # => "¡Feliz ano!"
|
104
|
+
# string.transliterate :spanish # => "¡Feliz anio!"
|
105
|
+
#
|
106
|
+
# The approximations are an array, which you can modify if you choose:
|
107
|
+
#
|
108
|
+
# # Make Spanish use "nh" rather than "nn"
|
109
|
+
# Kebab::Transliterator::Spanish::APPROXIMATIONS["ñ"] = "nh"
|
110
|
+
#
|
111
|
+
# Notice that this method does not simply convert to ASCII; if you want
|
112
|
+
# to remove non-ASCII characters such as "¡" and "¿", use {#to_ascii!}:
|
113
|
+
#
|
114
|
+
# string.transliterate!(:spanish) # => "¡Feliz anio!"
|
115
|
+
# string.transliterate! # => "¡Feliz anio!"
|
116
|
+
#
|
117
|
+
# @param *args <Symbol>
|
118
|
+
# @return String
|
119
|
+
def transliterate!(*kinds)
|
120
|
+
kinds.compact!
|
121
|
+
kinds = [:latin] if kinds.empty?
|
122
|
+
kinds.each do |kind|
|
123
|
+
transliterator = Transliterator.get(kind).instance
|
124
|
+
@wrapped_string = transliterator.transliterate(@wrapped_string)
|
125
|
+
end
|
126
|
+
@wrapped_string
|
127
|
+
end
|
128
|
+
|
129
|
+
# Converts dashes to spaces, removes leading and trailing spaces, and
|
130
|
+
# replaces multiple whitespace characters with a single space.
|
131
|
+
# @return String
|
132
|
+
def clean!
|
133
|
+
@wrapped_string = @wrapped_string.gsub("-", " ").squeeze(" ").strip
|
134
|
+
end
|
135
|
+
|
136
|
+
# Remove any non-word characters. For this library's purposes, this means
|
137
|
+
# anything other than letters, numbers, spaces, newlines and linefeeds.
|
138
|
+
# @return String
|
139
|
+
def word_chars!
|
140
|
+
@wrapped_string = (unpack("U*") - Kebab::STRIPPABLE).pack("U*")
|
141
|
+
end
|
142
|
+
|
143
|
+
# Normalize the string for use as a URL slug. Note that in this context,
|
144
|
+
# +normalize+ means, strip, remove non-letters/numbers, downcasing,
|
145
|
+
# truncating to 255 bytes and converting whitespace to dashes.
|
146
|
+
# @param Options
|
147
|
+
# @return String
|
148
|
+
def normalize!(options = nil)
|
149
|
+
options = default_normalize_options.merge(options || {})
|
150
|
+
|
151
|
+
if translit_option = options[:transliterate]
|
152
|
+
if translit_option != true
|
153
|
+
transliterate!(*translit_option)
|
154
|
+
else
|
155
|
+
transliterate!(*options[:transliterations])
|
156
|
+
end
|
157
|
+
end
|
158
|
+
to_ascii! if options[:to_ascii]
|
159
|
+
clean!
|
160
|
+
word_chars!
|
161
|
+
clean!
|
162
|
+
downcase!
|
163
|
+
truncate_bytes!(options[:max_length])
|
164
|
+
with_separators!(options[:separator])
|
165
|
+
end
|
166
|
+
|
167
|
+
# Normalize a string so that it can safely be used as a Ruby method name.
|
168
|
+
def to_ruby_method!(allow_bangs = true)
|
169
|
+
leader, trailer = @wrapped_string.strip.scan(/\A(.+)(.)\z/).flatten
|
170
|
+
leader = leader.to_s
|
171
|
+
trailer = trailer.to_s
|
172
|
+
if allow_bangs
|
173
|
+
trailer.downcase!
|
174
|
+
trailer.gsub!(/[^a-z0-9!=\\?]/, '')
|
175
|
+
else
|
176
|
+
trailer.downcase!
|
177
|
+
trailer.gsub!(/[^a-z0-9]/, '')
|
178
|
+
end
|
179
|
+
id = leader.to_identifier
|
180
|
+
id.transliterate!
|
181
|
+
id.to_ascii!
|
182
|
+
id.clean!
|
183
|
+
id.word_chars!
|
184
|
+
id.clean!
|
185
|
+
@wrapped_string = id.to_s + trailer
|
186
|
+
if @wrapped_string == ""
|
187
|
+
raise Error, "Input generates impossible Ruby method name"
|
188
|
+
end
|
189
|
+
with_separators!("_")
|
190
|
+
end
|
191
|
+
|
192
|
+
# Delete any non-ascii characters.
|
193
|
+
# @return String
|
194
|
+
def to_ascii!
|
195
|
+
@wrapped_string = @wrapped_string.gsub(/[^\x00-\x7f]/u, '')
|
196
|
+
end
|
197
|
+
|
198
|
+
# Truncate the string to +max+ characters.
|
199
|
+
# @example
|
200
|
+
# "üéøá".to_identifier.truncate(3) #=> "üéø"
|
201
|
+
# @return String
|
202
|
+
def truncate!(max)
|
203
|
+
@wrapped_string = unpack("U*")[0...max].pack("U*")
|
204
|
+
end
|
205
|
+
|
206
|
+
# Truncate the string to +max+ bytes. This can be useful for ensuring that
|
207
|
+
# a UTF-8 string will always fit into a database column with a certain max
|
208
|
+
# byte length. The resulting string may be less than +max+ if the string must
|
209
|
+
# be truncated at a multibyte character boundary.
|
210
|
+
# @example
|
211
|
+
# "üéøá".to_identifier.truncate_bytes(3) #=> "ü"
|
212
|
+
# @return String
|
213
|
+
def truncate_bytes!(max)
|
214
|
+
return @wrapped_string if @wrapped_string.bytesize <= max
|
215
|
+
curr = 0
|
216
|
+
new = []
|
217
|
+
unpack("U*").each do |char|
|
218
|
+
break if curr > max
|
219
|
+
char = [char].pack("U")
|
220
|
+
curr += char.bytesize
|
221
|
+
if curr <= max
|
222
|
+
new << char
|
223
|
+
end
|
224
|
+
end
|
225
|
+
@wrapped_string = new.join
|
226
|
+
end
|
227
|
+
|
228
|
+
# Replaces whitespace with dashes ("-").
|
229
|
+
# @return String
|
230
|
+
def with_separators!(char = "-")
|
231
|
+
@wrapped_string = @wrapped_string.gsub(/\s/u, char)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Perform UTF-8 sensitive upcasing.
|
235
|
+
# @return String
|
236
|
+
def upcase!
|
237
|
+
@wrapped_string = @@utf8_proxy.upcase(@wrapped_string)
|
238
|
+
end
|
239
|
+
|
240
|
+
# Perform UTF-8 sensitive downcasing.
|
241
|
+
# @return String
|
242
|
+
def downcase!
|
243
|
+
@wrapped_string = @@utf8_proxy.downcase(@wrapped_string)
|
244
|
+
end
|
245
|
+
|
246
|
+
# Perform Unicode composition on the wrapped string.
|
247
|
+
# @return String
|
248
|
+
def normalize_utf8!
|
249
|
+
@wrapped_string = @@utf8_proxy.normalize_utf8(@wrapped_string)
|
250
|
+
end
|
251
|
+
|
252
|
+
# Attempt to convert characters encoded using CP1252 and IS0-8859-1 to
|
253
|
+
# UTF-8.
|
254
|
+
# @return String
|
255
|
+
def tidy_bytes!
|
256
|
+
@wrapped_string = @@utf8_proxy.tidy_bytes(@wrapped_string)
|
257
|
+
end
|
258
|
+
|
259
|
+
%w[transliterate clean downcase word_chars normalize normalize_utf8
|
260
|
+
tidy_bytes to_ascii to_ruby_method truncate truncate_bytes upcase
|
261
|
+
with_separators].each do |method|
|
262
|
+
class_eval(<<-EOM, __FILE__, __LINE__ + 1)
|
263
|
+
def #{method}(*args)
|
264
|
+
send_to_new_instance(:#{method}!, *args)
|
265
|
+
end
|
266
|
+
EOM
|
267
|
+
end
|
268
|
+
|
269
|
+
def to_identifier
|
270
|
+
self
|
271
|
+
end
|
272
|
+
|
273
|
+
# The default options for {#normalize!}. Override to set your own defaults.
|
274
|
+
def default_normalize_options
|
275
|
+
{:transliterate => true, :max_length => 255, :separator => "-"}
|
276
|
+
end
|
277
|
+
|
278
|
+
alias approximate_ascii transliterate
|
279
|
+
alias approximate_ascii! transliterate!
|
280
|
+
alias with_dashes with_separators
|
281
|
+
alias with_dashes! with_separators!
|
282
|
+
alias to_slug to_identifier
|
283
|
+
|
284
|
+
private
|
285
|
+
|
286
|
+
# Used as the basis of the bangless methods.
|
287
|
+
def send_to_new_instance(*args)
|
288
|
+
id = Identifier.allocate
|
289
|
+
id.instance_variable_set :@wrapped_string, to_s
|
290
|
+
id.send(*args)
|
291
|
+
id
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|