stringex 1.5.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +74 -0
- data/README.rdoc +22 -1
- data/Rakefile +46 -223
- data/VERSION +1 -0
- data/init.rb +1 -0
- data/lib/stringex.rb +11 -3
- data/lib/stringex/acts_as_url.rb +49 -97
- data/lib/stringex/acts_as_url/adapter.rb +26 -0
- data/lib/stringex/acts_as_url/adapter/active_record.rb +23 -0
- data/lib/stringex/acts_as_url/adapter/base.rb +188 -0
- data/lib/stringex/acts_as_url/adapter/data_mapper.rb +67 -0
- data/lib/stringex/acts_as_url/adapter/mongoid.rb +36 -0
- data/lib/stringex/configuration.rb +4 -0
- data/lib/stringex/configuration/acts_as_url.rb +44 -0
- data/lib/stringex/configuration/base.rb +58 -0
- data/lib/stringex/configuration/configurator.rb +25 -0
- data/lib/stringex/configuration/string_extensions.rb +19 -0
- data/lib/stringex/localization.rb +98 -0
- data/lib/stringex/localization/backend/i18n.rb +53 -0
- data/lib/stringex/localization/backend/internal.rb +51 -0
- data/lib/stringex/localization/conversion_expressions.rb +148 -0
- data/lib/stringex/localization/converter.rb +121 -0
- data/lib/stringex/localization/default_conversions.rb +88 -0
- data/lib/stringex/rails/railtie.rb +10 -0
- data/lib/stringex/string_extensions.rb +153 -208
- data/lib/stringex/unidecoder.rb +6 -101
- data/lib/stringex/unidecoder_data/x00.yml +1 -1
- data/lib/stringex/unidecoder_data/x02.yml +5 -5
- data/lib/stringex/unidecoder_data/x05.yml +1 -1
- data/lib/stringex/unidecoder_data/x06.yml +1 -1
- data/lib/stringex/unidecoder_data/x07.yml +3 -3
- data/lib/stringex/unidecoder_data/x09.yml +1 -1
- data/lib/stringex/unidecoder_data/x0e.yml +2 -2
- data/lib/stringex/unidecoder_data/x1f.yml +2 -2
- data/lib/stringex/unidecoder_data/x20.yml +1 -1
- data/lib/stringex/unidecoder_data/xfb.yml +1 -1
- data/lib/stringex/unidecoder_data/xff.yml +1 -1
- data/lib/stringex/version.rb +8 -0
- data/locales/da.yml +73 -0
- data/locales/en.yml +66 -0
- data/stringex.gemspec +77 -18
- data/test/acts_as_url/adapter/active_record.rb +72 -0
- data/test/acts_as_url/adapter/data_mapper.rb +82 -0
- data/test/acts_as_url/adapter/mongoid.rb +73 -0
- data/test/acts_as_url_configuration_test.rb +51 -0
- data/test/acts_as_url_integration_test.rb +271 -0
- data/test/localization/da_test.rb +117 -0
- data/test/localization/default_test.rb +113 -0
- data/test/localization/en_test.rb +117 -0
- data/test/localization_test.rb +123 -0
- data/test/redcloth_to_html_test.rb +37 -0
- data/test/string_extensions_test.rb +59 -91
- data/test/test_helper.rb +2 -0
- data/test/unicode_point_suite/basic_greek_test.rb +113 -0
- data/test/unicode_point_suite/basic_latin_test.rb +142 -0
- data/test/unicode_point_suite/codepoint_test_helper.rb +32 -0
- data/test/unidecoder/bad_localization.yml +1 -0
- data/test/unidecoder/localization.yml +4 -0
- data/test/unidecoder_test.rb +3 -5
- metadata +145 -37
- data/test/acts_as_url_test.rb +0 -272
data/lib/stringex/unidecoder.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require "yaml"
|
3
2
|
|
4
3
|
module Stringex
|
5
4
|
module Unidecoder
|
@@ -7,15 +6,14 @@ module Stringex
|
|
7
6
|
CODEPOINTS = Hash.new{|h, k|
|
8
7
|
h[k] = YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml"))
|
9
8
|
} unless defined?(CODEPOINTS)
|
10
|
-
LOCAL_CODEPOINTS = Hash.new unless defined?(LOCAL_CODEPOINTS)
|
11
9
|
|
12
10
|
class << self
|
13
11
|
# Returns string with its UTF-8 characters transliterated to ASCII ones
|
14
12
|
#
|
15
13
|
# You're probably better off just using the added String#to_ascii
|
16
14
|
def decode(string)
|
17
|
-
string.gsub(/[^\x00-\
|
18
|
-
if localized =
|
15
|
+
string.gsub(/[^\x00-\x00]/u) do |codepoint|
|
16
|
+
if localized = translate(codepoint)
|
19
17
|
localized
|
20
18
|
else
|
21
19
|
begin
|
@@ -47,66 +45,12 @@ module Stringex
|
|
47
45
|
"#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})"
|
48
46
|
end
|
49
47
|
|
50
|
-
|
51
|
-
def localize_from(hash_or_path_to_file)
|
52
|
-
hash = if hash_or_path_to_file.is_a?(Hash)
|
53
|
-
hash_or_path_to_file
|
54
|
-
else
|
55
|
-
YAML.load_file(hash_or_path_to_file)
|
56
|
-
end
|
57
|
-
verify_local_codepoints hash
|
58
|
-
end
|
59
|
-
|
60
|
-
# Returns locale for localized transliterations
|
61
|
-
def locale
|
62
|
-
if @locale
|
63
|
-
@locale
|
64
|
-
elsif defined?(I18n)
|
65
|
-
I18n.locale
|
66
|
-
else
|
67
|
-
default_locale
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# Sets locale for localized transliterations
|
72
|
-
def locale=(new_locale)
|
73
|
-
@locale = new_locale
|
74
|
-
end
|
75
|
-
|
76
|
-
# Returns default locale for localized transliterations. NOTE: Will set @locale as well.
|
77
|
-
def default_locale
|
78
|
-
@default_locale ||= "en"
|
79
|
-
@locale = @default_locale
|
80
|
-
end
|
81
|
-
|
82
|
-
# Sets the default locale for localized transliterations. NOTE: Will set @locale as well.
|
83
|
-
def default_locale=(new_locale)
|
84
|
-
@default_locale = new_locale
|
85
|
-
# Seems logical that @locale should be the new default
|
86
|
-
@locale = new_locale
|
87
|
-
end
|
88
|
-
|
89
|
-
# Returns the localized transliteration for a codepoint
|
90
|
-
def local_codepoint(codepoint)
|
91
|
-
locale_hash = LOCAL_CODEPOINTS[locale] || LOCAL_CODEPOINTS[locale.is_a?(Symbol) ? locale.to_s : locale.to_sym]
|
92
|
-
locale_hash && locale_hash[codepoint]
|
93
|
-
end
|
94
|
-
|
95
|
-
# Runs a block with a temporary locale setting, returning the locale to the original state when complete
|
96
|
-
def with_locale(new_locale, &block)
|
97
|
-
new_locale = default_locale if new_locale == :default
|
98
|
-
original_locale = locale
|
99
|
-
self.locale = new_locale
|
100
|
-
block.call
|
101
|
-
self.locale = original_locale
|
102
|
-
end
|
48
|
+
private
|
103
49
|
|
104
|
-
|
105
|
-
|
106
|
-
with_locale default_locale, &block
|
50
|
+
def translate(codepoint)
|
51
|
+
Localization.translate(:transliterations, codepoint)
|
107
52
|
end
|
108
53
|
|
109
|
-
private
|
110
54
|
# Returns the Unicode codepoint grouping for the given character
|
111
55
|
def code_group(unpacked_character)
|
112
56
|
"x%02x" % (unpacked_character >> 8)
|
@@ -116,51 +60,12 @@ module Stringex
|
|
116
60
|
def grouped_point(unpacked_character)
|
117
61
|
unpacked_character & 255
|
118
62
|
end
|
119
|
-
|
120
|
-
# Checks LOCAL_CODEPOINTS's Hash is in the format we expect before assigning it and raises
|
121
|
-
# instructive exception if not
|
122
|
-
def verify_local_codepoints(hash)
|
123
|
-
if !pass_check(hash)
|
124
|
-
raise ArgumentError, "LOCAL_CODEPOINTS is not correctly defined. Please see the README for more information on how to correctly format this data."
|
125
|
-
end
|
126
|
-
hash.each{|k, v| LOCAL_CODEPOINTS[k] = v}
|
127
|
-
end
|
128
|
-
|
129
|
-
def pass_check(hash)
|
130
|
-
return false if !hash.is_a?(Hash)
|
131
|
-
hash.all?{|key, value| pass_check_key_and_value_test(key, value) }
|
132
|
-
end
|
133
|
-
|
134
|
-
def pass_check_key_and_value_test(key, value)
|
135
|
-
# Fuck a duck, eh?
|
136
|
-
return false unless [Symbol, String].include?(key.class)
|
137
|
-
return false unless value.is_a?(Hash)
|
138
|
-
value.all?{|k, v| k.is_a?(String) && v.is_a?(String)}
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
# Provide a simpler interface for localization implementations
|
144
|
-
class << self
|
145
|
-
%w{
|
146
|
-
localize_from
|
147
|
-
locale
|
148
|
-
locale=
|
149
|
-
default_locale
|
150
|
-
default_locale=
|
151
|
-
local_codepoint
|
152
|
-
with_locale
|
153
|
-
with_default_locale
|
154
|
-
}.each do |name|
|
155
|
-
define_method name do |*args, &block|
|
156
|
-
Unidecoder.send name, *args, &block
|
157
|
-
end
|
158
63
|
end
|
159
64
|
end
|
160
65
|
end
|
161
66
|
|
162
67
|
module Stringex
|
163
|
-
module StringExtensions
|
68
|
+
module StringExtensions::PublicInstanceMethods
|
164
69
|
# Returns string with its UTF-8 characters transliterated to ASCII ones. Example:
|
165
70
|
#
|
166
71
|
# "⠋⠗⠁⠝⠉⠑".to_ascii #=> "france"
|
@@ -186,10 +186,10 @@
|
|
186
186
|
- y
|
187
187
|
- "'"
|
188
188
|
- '"'
|
189
|
-
- '`'
|
190
189
|
- "'"
|
191
|
-
- '
|
192
|
-
- '
|
190
|
+
- "'"
|
191
|
+
- "'"
|
192
|
+
- "'"
|
193
193
|
- "'"
|
194
194
|
- '?'
|
195
195
|
- '?'
|
@@ -202,14 +202,14 @@
|
|
202
202
|
- "'"
|
203
203
|
- '-'
|
204
204
|
- /
|
205
|
-
- '
|
205
|
+
- "'"
|
206
206
|
- ','
|
207
207
|
- _
|
208
208
|
- \
|
209
209
|
- /
|
210
210
|
- ':'
|
211
211
|
- .
|
212
|
-
- '
|
212
|
+
- "'"
|
213
213
|
- "'"
|
214
214
|
- '^'
|
215
215
|
- V
|
@@ -36,7 +36,7 @@
|
|
36
36
|
- n
|
37
37
|
- s
|
38
38
|
- s
|
39
|
-
- '
|
39
|
+
- "'"
|
40
40
|
- p
|
41
41
|
- p
|
42
42
|
- S
|
@@ -64,7 +64,7 @@
|
|
64
64
|
- u
|
65
65
|
- o
|
66
66
|
- ''
|
67
|
-
- '
|
67
|
+
- "'"
|
68
68
|
- "'"
|
69
69
|
- ''
|
70
70
|
- ''
|
@@ -161,7 +161,7 @@
|
|
161
161
|
- d
|
162
162
|
- t
|
163
163
|
- z
|
164
|
-
- '
|
164
|
+
- "'"
|
165
165
|
- gh
|
166
166
|
- q
|
167
167
|
- w
|
data/locales/da.yml
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# NOTE: Some translation keys make use of matches from regular expressions
|
2
|
+
# to manipulate whitespace and order. Please consult the source code for
|
3
|
+
# Stringex::Localization::ConversionExpressions to see what those
|
4
|
+
# regular expressions look like if you need to manipulate the order
|
5
|
+
# differently than the usage below.
|
6
|
+
da:
|
7
|
+
stringex:
|
8
|
+
characters:
|
9
|
+
and: og
|
10
|
+
at: snabel-a
|
11
|
+
divide: divideret med
|
12
|
+
degrees: grader
|
13
|
+
dot: \1 punktum \2
|
14
|
+
ellipsis: prik prik prik
|
15
|
+
equals: lig med
|
16
|
+
number: nummer
|
17
|
+
percent: procent
|
18
|
+
plus: plus
|
19
|
+
slash: skråstreg
|
20
|
+
star: stjerne
|
21
|
+
currencies:
|
22
|
+
dollars: \1 dollars
|
23
|
+
dollars_cents: \1 dollars \2 cents
|
24
|
+
pounds: \1 pund
|
25
|
+
pounds_pence: \1 pund \2 pence
|
26
|
+
euros: \1 euro
|
27
|
+
euros_cents: \1 euro \2 cent
|
28
|
+
yen: \1 yen
|
29
|
+
html_entities:
|
30
|
+
amp: and
|
31
|
+
cent: " cents"
|
32
|
+
copy: (c)
|
33
|
+
deg: " grader "
|
34
|
+
divide: " divideret med "
|
35
|
+
double_quote: '"'
|
36
|
+
ellipsis: "..."
|
37
|
+
en_dash: "-"
|
38
|
+
em_dash: "--"
|
39
|
+
frac14: en fjerdedel
|
40
|
+
frac12: halv
|
41
|
+
frac34: tre fjerdedele
|
42
|
+
gt: ">"
|
43
|
+
lt: <
|
44
|
+
nbsp: " "
|
45
|
+
pound: " pund "
|
46
|
+
reg: (r)
|
47
|
+
single_quote: "'"
|
48
|
+
times: x
|
49
|
+
trade: (tm)
|
50
|
+
yen: " yen "
|
51
|
+
vulgar_fractions:
|
52
|
+
half: halv
|
53
|
+
one_third: en tredjedel
|
54
|
+
two_thirds: to tredjedele
|
55
|
+
one_fourth: en fjerdedel
|
56
|
+
three_fourths: tre fjerdedele
|
57
|
+
one_fifth: en femtedel
|
58
|
+
two_fifths: to femtedele
|
59
|
+
three_fifths: tre femtedele
|
60
|
+
four_fifths: fire femtedele
|
61
|
+
one_sixth: en sjettedel
|
62
|
+
five_sixths: fem sjettedele
|
63
|
+
one_eighth: en ottendedel
|
64
|
+
three_eighths: tre ottendedele
|
65
|
+
five_eighths: fem ottendedele
|
66
|
+
seven_eighths: syv ottendedele
|
67
|
+
transliterations:
|
68
|
+
Æ: AE
|
69
|
+
Ø: OE
|
70
|
+
Å: AA
|
71
|
+
æ: ae
|
72
|
+
ø: oe
|
73
|
+
å: aa
|
data/locales/en.yml
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# NOTE: Some translation keys make use of matches from regular expressions
|
2
|
+
# to manipulate whitespace and order. Please consult the source code for
|
3
|
+
# Stringex::Localization::ConversionExpressions to see what those
|
4
|
+
# regular expressions look like if you need to manipulate the order
|
5
|
+
# differently than the usage below.
|
6
|
+
en:
|
7
|
+
stringex:
|
8
|
+
characters:
|
9
|
+
and: and
|
10
|
+
at: at
|
11
|
+
divide: divided by
|
12
|
+
degrees: degrees
|
13
|
+
dot: \1 dot \2
|
14
|
+
ellipsis: dot dot dot
|
15
|
+
equals: equals
|
16
|
+
number: number
|
17
|
+
percent: percent
|
18
|
+
plus: plus
|
19
|
+
slash: slash
|
20
|
+
star: star
|
21
|
+
currencies:
|
22
|
+
dollars: \1 dollars
|
23
|
+
dollars_cents: \1 dollars \2 cents
|
24
|
+
pounds: \1 pounds
|
25
|
+
pounds_pence: \1 pounds \2 pence
|
26
|
+
euros: \1 euros
|
27
|
+
euros_cents: \1 euros \2 cents
|
28
|
+
yen: \1 yen
|
29
|
+
html_entities:
|
30
|
+
amp: and
|
31
|
+
cent: " cents"
|
32
|
+
copy: (c)
|
33
|
+
deg: " degrees "
|
34
|
+
divide: " divided by "
|
35
|
+
double_quote: '"'
|
36
|
+
ellipsis: "..."
|
37
|
+
en_dash: "-"
|
38
|
+
em_dash: "--"
|
39
|
+
frac14: one fourth
|
40
|
+
frac12: half
|
41
|
+
frac34: three fourths
|
42
|
+
gt: ">"
|
43
|
+
lt: <
|
44
|
+
nbsp: " "
|
45
|
+
pound: " pounds "
|
46
|
+
reg: (r)
|
47
|
+
single_quote: "'"
|
48
|
+
times: x
|
49
|
+
trade: (tm)
|
50
|
+
yen: " yen "
|
51
|
+
vulgar_fractions:
|
52
|
+
half: half
|
53
|
+
one_third: one third
|
54
|
+
two_thirds: two thirds
|
55
|
+
one_fourth: one fourth
|
56
|
+
three_fourths: three fourths
|
57
|
+
one_fifth: one fifth
|
58
|
+
two_fifths: two fifths
|
59
|
+
three_fifths: three fifths
|
60
|
+
four_fifths: four fifths
|
61
|
+
one_sixth: one sixth
|
62
|
+
five_sixths: five sixths
|
63
|
+
one_eighth: one eighth
|
64
|
+
three_eighths: three eighths
|
65
|
+
five_eighths: five eighths
|
66
|
+
seven_eighths: seven eighths
|