stringex 1.5.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +74 -0
- data/README.rdoc +22 -1
- data/Rakefile +46 -223
- data/VERSION +1 -0
- data/init.rb +1 -0
- data/lib/stringex.rb +11 -3
- data/lib/stringex/acts_as_url.rb +49 -97
- data/lib/stringex/acts_as_url/adapter.rb +26 -0
- data/lib/stringex/acts_as_url/adapter/active_record.rb +23 -0
- data/lib/stringex/acts_as_url/adapter/base.rb +188 -0
- data/lib/stringex/acts_as_url/adapter/data_mapper.rb +67 -0
- data/lib/stringex/acts_as_url/adapter/mongoid.rb +36 -0
- data/lib/stringex/configuration.rb +4 -0
- data/lib/stringex/configuration/acts_as_url.rb +44 -0
- data/lib/stringex/configuration/base.rb +58 -0
- data/lib/stringex/configuration/configurator.rb +25 -0
- data/lib/stringex/configuration/string_extensions.rb +19 -0
- data/lib/stringex/localization.rb +98 -0
- data/lib/stringex/localization/backend/i18n.rb +53 -0
- data/lib/stringex/localization/backend/internal.rb +51 -0
- data/lib/stringex/localization/conversion_expressions.rb +148 -0
- data/lib/stringex/localization/converter.rb +121 -0
- data/lib/stringex/localization/default_conversions.rb +88 -0
- data/lib/stringex/rails/railtie.rb +10 -0
- data/lib/stringex/string_extensions.rb +153 -208
- data/lib/stringex/unidecoder.rb +6 -101
- data/lib/stringex/unidecoder_data/x00.yml +1 -1
- data/lib/stringex/unidecoder_data/x02.yml +5 -5
- data/lib/stringex/unidecoder_data/x05.yml +1 -1
- data/lib/stringex/unidecoder_data/x06.yml +1 -1
- data/lib/stringex/unidecoder_data/x07.yml +3 -3
- data/lib/stringex/unidecoder_data/x09.yml +1 -1
- data/lib/stringex/unidecoder_data/x0e.yml +2 -2
- data/lib/stringex/unidecoder_data/x1f.yml +2 -2
- data/lib/stringex/unidecoder_data/x20.yml +1 -1
- data/lib/stringex/unidecoder_data/xfb.yml +1 -1
- data/lib/stringex/unidecoder_data/xff.yml +1 -1
- data/lib/stringex/version.rb +8 -0
- data/locales/da.yml +73 -0
- data/locales/en.yml +66 -0
- data/stringex.gemspec +77 -18
- data/test/acts_as_url/adapter/active_record.rb +72 -0
- data/test/acts_as_url/adapter/data_mapper.rb +82 -0
- data/test/acts_as_url/adapter/mongoid.rb +73 -0
- data/test/acts_as_url_configuration_test.rb +51 -0
- data/test/acts_as_url_integration_test.rb +271 -0
- data/test/localization/da_test.rb +117 -0
- data/test/localization/default_test.rb +113 -0
- data/test/localization/en_test.rb +117 -0
- data/test/localization_test.rb +123 -0
- data/test/redcloth_to_html_test.rb +37 -0
- data/test/string_extensions_test.rb +59 -91
- data/test/test_helper.rb +2 -0
- data/test/unicode_point_suite/basic_greek_test.rb +113 -0
- data/test/unicode_point_suite/basic_latin_test.rb +142 -0
- data/test/unicode_point_suite/codepoint_test_helper.rb +32 -0
- data/test/unidecoder/bad_localization.yml +1 -0
- data/test/unidecoder/localization.yml +4 -0
- data/test/unidecoder_test.rb +3 -5
- metadata +145 -37
- data/test/acts_as_url_test.rb +0 -272
data/lib/stringex/unidecoder.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require "yaml"
|
3
2
|
|
4
3
|
module Stringex
|
5
4
|
module Unidecoder
|
@@ -7,15 +6,14 @@ module Stringex
|
|
7
6
|
CODEPOINTS = Hash.new{|h, k|
|
8
7
|
h[k] = YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml"))
|
9
8
|
} unless defined?(CODEPOINTS)
|
10
|
-
LOCAL_CODEPOINTS = Hash.new unless defined?(LOCAL_CODEPOINTS)
|
11
9
|
|
12
10
|
class << self
|
13
11
|
# Returns string with its UTF-8 characters transliterated to ASCII ones
|
14
12
|
#
|
15
13
|
# You're probably better off just using the added String#to_ascii
|
16
14
|
def decode(string)
|
17
|
-
string.gsub(/[^\x00-\
|
18
|
-
if localized =
|
15
|
+
string.gsub(/[^\x00-\x00]/u) do |codepoint|
|
16
|
+
if localized = translate(codepoint)
|
19
17
|
localized
|
20
18
|
else
|
21
19
|
begin
|
@@ -47,66 +45,12 @@ module Stringex
|
|
47
45
|
"#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})"
|
48
46
|
end
|
49
47
|
|
50
|
-
|
51
|
-
def localize_from(hash_or_path_to_file)
|
52
|
-
hash = if hash_or_path_to_file.is_a?(Hash)
|
53
|
-
hash_or_path_to_file
|
54
|
-
else
|
55
|
-
YAML.load_file(hash_or_path_to_file)
|
56
|
-
end
|
57
|
-
verify_local_codepoints hash
|
58
|
-
end
|
59
|
-
|
60
|
-
# Returns locale for localized transliterations
|
61
|
-
def locale
|
62
|
-
if @locale
|
63
|
-
@locale
|
64
|
-
elsif defined?(I18n)
|
65
|
-
I18n.locale
|
66
|
-
else
|
67
|
-
default_locale
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# Sets locale for localized transliterations
|
72
|
-
def locale=(new_locale)
|
73
|
-
@locale = new_locale
|
74
|
-
end
|
75
|
-
|
76
|
-
# Returns default locale for localized transliterations. NOTE: Will set @locale as well.
|
77
|
-
def default_locale
|
78
|
-
@default_locale ||= "en"
|
79
|
-
@locale = @default_locale
|
80
|
-
end
|
81
|
-
|
82
|
-
# Sets the default locale for localized transliterations. NOTE: Will set @locale as well.
|
83
|
-
def default_locale=(new_locale)
|
84
|
-
@default_locale = new_locale
|
85
|
-
# Seems logical that @locale should be the new default
|
86
|
-
@locale = new_locale
|
87
|
-
end
|
88
|
-
|
89
|
-
# Returns the localized transliteration for a codepoint
|
90
|
-
def local_codepoint(codepoint)
|
91
|
-
locale_hash = LOCAL_CODEPOINTS[locale] || LOCAL_CODEPOINTS[locale.is_a?(Symbol) ? locale.to_s : locale.to_sym]
|
92
|
-
locale_hash && locale_hash[codepoint]
|
93
|
-
end
|
94
|
-
|
95
|
-
# Runs a block with a temporary locale setting, returning the locale to the original state when complete
|
96
|
-
def with_locale(new_locale, &block)
|
97
|
-
new_locale = default_locale if new_locale == :default
|
98
|
-
original_locale = locale
|
99
|
-
self.locale = new_locale
|
100
|
-
block.call
|
101
|
-
self.locale = original_locale
|
102
|
-
end
|
48
|
+
private
|
103
49
|
|
104
|
-
|
105
|
-
|
106
|
-
with_locale default_locale, &block
|
50
|
+
def translate(codepoint)
|
51
|
+
Localization.translate(:transliterations, codepoint)
|
107
52
|
end
|
108
53
|
|
109
|
-
private
|
110
54
|
# Returns the Unicode codepoint grouping for the given character
|
111
55
|
def code_group(unpacked_character)
|
112
56
|
"x%02x" % (unpacked_character >> 8)
|
@@ -116,51 +60,12 @@ module Stringex
|
|
116
60
|
def grouped_point(unpacked_character)
|
117
61
|
unpacked_character & 255
|
118
62
|
end
|
119
|
-
|
120
|
-
# Checks LOCAL_CODEPOINTS's Hash is in the format we expect before assigning it and raises
|
121
|
-
# instructive exception if not
|
122
|
-
def verify_local_codepoints(hash)
|
123
|
-
if !pass_check(hash)
|
124
|
-
raise ArgumentError, "LOCAL_CODEPOINTS is not correctly defined. Please see the README for more information on how to correctly format this data."
|
125
|
-
end
|
126
|
-
hash.each{|k, v| LOCAL_CODEPOINTS[k] = v}
|
127
|
-
end
|
128
|
-
|
129
|
-
def pass_check(hash)
|
130
|
-
return false if !hash.is_a?(Hash)
|
131
|
-
hash.all?{|key, value| pass_check_key_and_value_test(key, value) }
|
132
|
-
end
|
133
|
-
|
134
|
-
def pass_check_key_and_value_test(key, value)
|
135
|
-
# Fuck a duck, eh?
|
136
|
-
return false unless [Symbol, String].include?(key.class)
|
137
|
-
return false unless value.is_a?(Hash)
|
138
|
-
value.all?{|k, v| k.is_a?(String) && v.is_a?(String)}
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
# Provide a simpler interface for localization implementations
|
144
|
-
class << self
|
145
|
-
%w{
|
146
|
-
localize_from
|
147
|
-
locale
|
148
|
-
locale=
|
149
|
-
default_locale
|
150
|
-
default_locale=
|
151
|
-
local_codepoint
|
152
|
-
with_locale
|
153
|
-
with_default_locale
|
154
|
-
}.each do |name|
|
155
|
-
define_method name do |*args, &block|
|
156
|
-
Unidecoder.send name, *args, &block
|
157
|
-
end
|
158
63
|
end
|
159
64
|
end
|
160
65
|
end
|
161
66
|
|
162
67
|
module Stringex
|
163
|
-
module StringExtensions
|
68
|
+
module StringExtensions::PublicInstanceMethods
|
164
69
|
# Returns string with its UTF-8 characters transliterated to ASCII ones. Example:
|
165
70
|
#
|
166
71
|
# "⠋⠗⠁⠝⠉⠑".to_ascii #=> "france"
|
@@ -186,10 +186,10 @@
|
|
186
186
|
- y
|
187
187
|
- "'"
|
188
188
|
- '"'
|
189
|
-
- '`'
|
190
189
|
- "'"
|
191
|
-
- '
|
192
|
-
- '
|
190
|
+
- "'"
|
191
|
+
- "'"
|
192
|
+
- "'"
|
193
193
|
- "'"
|
194
194
|
- '?'
|
195
195
|
- '?'
|
@@ -202,14 +202,14 @@
|
|
202
202
|
- "'"
|
203
203
|
- '-'
|
204
204
|
- /
|
205
|
-
- '
|
205
|
+
- "'"
|
206
206
|
- ','
|
207
207
|
- _
|
208
208
|
- \
|
209
209
|
- /
|
210
210
|
- ':'
|
211
211
|
- .
|
212
|
-
- '
|
212
|
+
- "'"
|
213
213
|
- "'"
|
214
214
|
- '^'
|
215
215
|
- V
|
@@ -36,7 +36,7 @@
|
|
36
36
|
- n
|
37
37
|
- s
|
38
38
|
- s
|
39
|
-
- '
|
39
|
+
- "'"
|
40
40
|
- p
|
41
41
|
- p
|
42
42
|
- S
|
@@ -64,7 +64,7 @@
|
|
64
64
|
- u
|
65
65
|
- o
|
66
66
|
- ''
|
67
|
-
- '
|
67
|
+
- "'"
|
68
68
|
- "'"
|
69
69
|
- ''
|
70
70
|
- ''
|
@@ -161,7 +161,7 @@
|
|
161
161
|
- d
|
162
162
|
- t
|
163
163
|
- z
|
164
|
-
- '
|
164
|
+
- "'"
|
165
165
|
- gh
|
166
166
|
- q
|
167
167
|
- w
|
data/locales/da.yml
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# NOTE: Some translation keys make use of matches from regular expressions
|
2
|
+
# to manipulate whitespace and order. Please consult the source code for
|
3
|
+
# Stringex::Localization::ConversionExpressions to see what those
|
4
|
+
# regular expressions look like if you need to manipulate the order
|
5
|
+
# differently than the usage below.
|
6
|
+
da:
|
7
|
+
stringex:
|
8
|
+
characters:
|
9
|
+
and: og
|
10
|
+
at: snabel-a
|
11
|
+
divide: divideret med
|
12
|
+
degrees: grader
|
13
|
+
dot: \1 punktum \2
|
14
|
+
ellipsis: prik prik prik
|
15
|
+
equals: lig med
|
16
|
+
number: nummer
|
17
|
+
percent: procent
|
18
|
+
plus: plus
|
19
|
+
slash: skråstreg
|
20
|
+
star: stjerne
|
21
|
+
currencies:
|
22
|
+
dollars: \1 dollars
|
23
|
+
dollars_cents: \1 dollars \2 cents
|
24
|
+
pounds: \1 pund
|
25
|
+
pounds_pence: \1 pund \2 pence
|
26
|
+
euros: \1 euro
|
27
|
+
euros_cents: \1 euro \2 cent
|
28
|
+
yen: \1 yen
|
29
|
+
html_entities:
|
30
|
+
amp: and
|
31
|
+
cent: " cents"
|
32
|
+
copy: (c)
|
33
|
+
deg: " grader "
|
34
|
+
divide: " divideret med "
|
35
|
+
double_quote: '"'
|
36
|
+
ellipsis: "..."
|
37
|
+
en_dash: "-"
|
38
|
+
em_dash: "--"
|
39
|
+
frac14: en fjerdedel
|
40
|
+
frac12: halv
|
41
|
+
frac34: tre fjerdedele
|
42
|
+
gt: ">"
|
43
|
+
lt: <
|
44
|
+
nbsp: " "
|
45
|
+
pound: " pund "
|
46
|
+
reg: (r)
|
47
|
+
single_quote: "'"
|
48
|
+
times: x
|
49
|
+
trade: (tm)
|
50
|
+
yen: " yen "
|
51
|
+
vulgar_fractions:
|
52
|
+
half: halv
|
53
|
+
one_third: en tredjedel
|
54
|
+
two_thirds: to tredjedele
|
55
|
+
one_fourth: en fjerdedel
|
56
|
+
three_fourths: tre fjerdedele
|
57
|
+
one_fifth: en femtedel
|
58
|
+
two_fifths: to femtedele
|
59
|
+
three_fifths: tre femtedele
|
60
|
+
four_fifths: fire femtedele
|
61
|
+
one_sixth: en sjettedel
|
62
|
+
five_sixths: fem sjettedele
|
63
|
+
one_eighth: en ottendedel
|
64
|
+
three_eighths: tre ottendedele
|
65
|
+
five_eighths: fem ottendedele
|
66
|
+
seven_eighths: syv ottendedele
|
67
|
+
transliterations:
|
68
|
+
Æ: AE
|
69
|
+
Ø: OE
|
70
|
+
Å: AA
|
71
|
+
æ: ae
|
72
|
+
ø: oe
|
73
|
+
å: aa
|
data/locales/en.yml
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# NOTE: Some translation keys make use of matches from regular expressions
|
2
|
+
# to manipulate whitespace and order. Please consult the source code for
|
3
|
+
# Stringex::Localization::ConversionExpressions to see what those
|
4
|
+
# regular expressions look like if you need to manipulate the order
|
5
|
+
# differently than the usage below.
|
6
|
+
en:
|
7
|
+
stringex:
|
8
|
+
characters:
|
9
|
+
and: and
|
10
|
+
at: at
|
11
|
+
divide: divided by
|
12
|
+
degrees: degrees
|
13
|
+
dot: \1 dot \2
|
14
|
+
ellipsis: dot dot dot
|
15
|
+
equals: equals
|
16
|
+
number: number
|
17
|
+
percent: percent
|
18
|
+
plus: plus
|
19
|
+
slash: slash
|
20
|
+
star: star
|
21
|
+
currencies:
|
22
|
+
dollars: \1 dollars
|
23
|
+
dollars_cents: \1 dollars \2 cents
|
24
|
+
pounds: \1 pounds
|
25
|
+
pounds_pence: \1 pounds \2 pence
|
26
|
+
euros: \1 euros
|
27
|
+
euros_cents: \1 euros \2 cents
|
28
|
+
yen: \1 yen
|
29
|
+
html_entities:
|
30
|
+
amp: and
|
31
|
+
cent: " cents"
|
32
|
+
copy: (c)
|
33
|
+
deg: " degrees "
|
34
|
+
divide: " divided by "
|
35
|
+
double_quote: '"'
|
36
|
+
ellipsis: "..."
|
37
|
+
en_dash: "-"
|
38
|
+
em_dash: "--"
|
39
|
+
frac14: one fourth
|
40
|
+
frac12: half
|
41
|
+
frac34: three fourths
|
42
|
+
gt: ">"
|
43
|
+
lt: <
|
44
|
+
nbsp: " "
|
45
|
+
pound: " pounds "
|
46
|
+
reg: (r)
|
47
|
+
single_quote: "'"
|
48
|
+
times: x
|
49
|
+
trade: (tm)
|
50
|
+
yen: " yen "
|
51
|
+
vulgar_fractions:
|
52
|
+
half: half
|
53
|
+
one_third: one third
|
54
|
+
two_thirds: two thirds
|
55
|
+
one_fourth: one fourth
|
56
|
+
three_fourths: three fourths
|
57
|
+
one_fifth: one fifth
|
58
|
+
two_fifths: two fifths
|
59
|
+
three_fifths: three fifths
|
60
|
+
four_fifths: four fifths
|
61
|
+
one_sixth: one sixth
|
62
|
+
five_sixths: five sixths
|
63
|
+
one_eighth: one eighth
|
64
|
+
three_eighths: three eighths
|
65
|
+
five_eighths: five eighths
|
66
|
+
seven_eighths: seven eighths
|