stringex 2.0.11 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +6 -14
- data/Gemfile +45 -14
- data/MIT-LICENSE +1 -1
- data/README.md +162 -0
- data/Rakefile +5 -31
- data/VERSION +1 -1
- data/lib/stringex/acts_as_url/adapter/active_record.rb +1 -1
- data/lib/stringex/acts_as_url/adapter/base.rb +37 -5
- data/lib/stringex/acts_as_url/adapter/data_mapper.rb +3 -7
- data/lib/stringex/acts_as_url/adapter/mongoid.rb +2 -2
- data/lib/stringex/acts_as_url.rb +11 -4
- data/lib/stringex/configuration/acts_as_url.rb +15 -10
- data/lib/stringex/configuration/string_extensions.rb +7 -6
- data/lib/stringex/core_ext.rb +10 -0
- data/lib/stringex/localization/backend/i18n.rb +16 -3
- data/lib/stringex/localization/conversion_expressions.rb +64 -60
- data/lib/stringex/localization/converter.rb +8 -4
- data/lib/stringex/localization/default_conversions.rb +56 -55
- data/lib/stringex/localization.rb +2 -2
- data/lib/stringex/string_extensions.rb +34 -6
- data/lib/stringex/unidecoder.rb +26 -22
- data/lib/stringex/unidecoder_data/x00.yml +1 -1
- data/lib/stringex/unidecoder_data/x03.yml +2 -2
- data/lib/stringex/unidecoder_data/x05.yml +1 -1
- data/lib/stringex/unidecoder_data/x07.yml +1 -1
- data/lib/stringex/unidecoder_data/x09.yml +2 -2
- data/lib/stringex/unidecoder_data/x0a.yml +2 -2
- data/lib/stringex/unidecoder_data/x0f.yml +1 -1
- data/lib/stringex/unidecoder_data/x12.yml +1 -1
- data/lib/stringex/unidecoder_data/x13.yml +1 -1
- data/lib/stringex/unidecoder_data/x14.yml +2 -2
- data/lib/stringex/unidecoder_data/x15.yml +1 -1
- data/lib/stringex/unidecoder_data/x16.yml +2 -2
- data/lib/stringex/unidecoder_data/x18.yml +2 -2
- data/lib/stringex/unidecoder_data/x20.yml +4 -4
- data/lib/stringex/unidecoder_data/x21.yml +28 -28
- data/lib/stringex/unidecoder_data/x25.yml +16 -16
- data/lib/stringex/unidecoder_data/x28.yml +1 -1
- data/lib/stringex/unidecoder_data/x30.yml +2 -2
- data/lib/stringex/unidecoder_data/x32.yml +1 -1
- data/lib/stringex/unidecoder_data/xa1.yml +1 -1
- data/lib/stringex/unidecoder_data/xb1.yml +1 -1
- data/lib/stringex/unidecoder_data/xc6.yml +2 -2
- data/lib/stringex/unidecoder_data/xfe.yml +3 -3
- data/lib/stringex/unidecoder_data/xff.yml +5 -5
- data/lib/stringex.rb +1 -2
- data/locales/da.yml +2 -1
- data/locales/de.yml +1 -0
- data/locales/en.yml +1 -0
- data/locales/fr.yml +72 -0
- data/locales/nb.yml +76 -0
- data/locales/nl.yml +69 -0
- data/locales/no.yml +76 -0
- data/locales/pl.yml +88 -0
- data/locales/pt-BR.yml +1 -0
- data/locales/ru.yml +1 -0
- data/locales/sv.yml +76 -0
- data/stringex.gemspec +38 -59
- data/test/unit/acts_as_url/adapter/{active_record.rb → activerecord.rb} +4 -5
- data/test/unit/acts_as_url/adapter/{data_mapper.rb → datamapper.rb} +2 -3
- data/test/unit/acts_as_url/adapter/mongoid.rb +18 -11
- data/test/unit/acts_as_url_configuration_test.rb +2 -2
- data/test/unit/acts_as_url_integration_test.rb +169 -73
- data/test/unit/localization/da_test.rb +3 -2
- data/test/unit/localization/de_test.rb +1 -0
- data/test/unit/localization/default_test.rb +2 -0
- data/test/unit/localization/en_test.rb +1 -0
- data/test/unit/localization/fr_test.rb +118 -0
- data/test/unit/localization/nl_test.rb +118 -0
- data/test/unit/localization/pl_test.rb +118 -0
- data/test/unit/localization/pt_br_test.rb +118 -0
- data/test/unit/localization/ru_test.rb +2 -1
- data/test/unit/localization/sv_test.rb +118 -0
- data/test/unit/localization_test.rb +39 -19
- data/test/unit/string_extensions_test.rb +60 -6
- data/test/unit/unicode_point_suite/basic_greek_test.rb +1 -1
- data/test/unit/unicode_point_suite/basic_latin_test.rb +3 -1
- data/test/unit/unidecoder_test.rb +2 -1
- metadata +47 -94
- data/README.rdoc +0 -131
- data/test/unit/redcloth_to_html_test.rb +0 -37
@@ -3,43 +3,44 @@
|
|
3
3
|
module Stringex
|
4
4
|
module Localization
|
5
5
|
module ConversionExpressions
|
6
|
-
ABBREVIATION = /(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/
|
6
|
+
ABBREVIATION = /(\s|\(|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|\)|$))/
|
7
7
|
|
8
8
|
ACCENTED_HTML_ENTITY = /&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/
|
9
9
|
|
10
10
|
APOSTROPHE = /(^|[[:alpha:]])'|`([[:alpha:]]|$)/
|
11
11
|
|
12
12
|
CHARACTERS = {
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
23
|
-
:
|
24
|
-
:
|
13
|
+
and: /\s*&\s*/,
|
14
|
+
at: /\s*@\s*/,
|
15
|
+
degrees: /\s*°\s*/,
|
16
|
+
divide: /\s*÷\s*/,
|
17
|
+
dot: /(\S|^)\.(\S)/,
|
18
|
+
ellipsis: /\s*\.{3,}\s*/,
|
19
|
+
equals: /\s*=\s*/,
|
20
|
+
number: /\s*#/,
|
21
|
+
percent: /\s*%\s*/,
|
22
|
+
plus: /\s*\+\s*/,
|
23
|
+
slash: /\s*(\\|\/|/)\s*/,
|
24
|
+
star: /\s*\*\s*/,
|
25
25
|
}
|
26
26
|
|
27
27
|
# Things that just get converted to spaces
|
28
|
-
CLEANUP_CHARACTERS = /[\.,:;(){}\[\]\?!\^'ʼ"_\|<>]/
|
28
|
+
CLEANUP_CHARACTERS = /[\.,:;(){}\[\]\?!\^'ʼ"`~_\|<>]/
|
29
29
|
CLEANUP_HTML_ENTITIES = /&[^;]+;/
|
30
30
|
|
31
31
|
CURRENCIES_SUPPORTED_SIMPLE = {
|
32
|
-
:
|
33
|
-
:
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
32
|
+
generic: /¤/,
|
33
|
+
dollars: /\$/,
|
34
|
+
euros: /€/,
|
35
|
+
pounds: /£/,
|
36
|
+
yen: /¥/,
|
37
|
+
reais: /R\$/
|
37
38
|
}
|
38
39
|
CURRENCIES_SUPPORTED_COMPLEX = {
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
40
|
+
dollars: :dollars_cents,
|
41
|
+
euros: :euros_cents,
|
42
|
+
pounds: :pounds_pence,
|
43
|
+
reais: :reais_cents
|
43
44
|
}
|
44
45
|
CURRENCIES_SUPPORTED = Regexp.new(CURRENCIES_SUPPORTED_SIMPLE.values.join('|'))
|
45
46
|
CURRENCIES_SIMPLE = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
|
@@ -60,27 +61,27 @@ module Stringex
|
|
60
61
|
|
61
62
|
HTML_ENTITIES = Proc.new(){
|
62
63
|
base = {
|
63
|
-
:
|
64
|
-
:
|
65
|
-
:
|
66
|
-
:
|
67
|
-
:
|
68
|
-
:
|
69
|
-
:
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
74
|
-
:
|
75
|
-
:
|
76
|
-
:
|
77
|
-
:
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:
|
81
|
-
:
|
82
|
-
:
|
83
|
-
:
|
64
|
+
amp: %w{#38 amp},
|
65
|
+
cent: %w{#162 cent},
|
66
|
+
copy: %w{#169 copy},
|
67
|
+
deg: %w{#176 deg},
|
68
|
+
divide: %w{#247 divide},
|
69
|
+
double_quote: %w{#34 #822[012] quot ldquo rdquo dbquo},
|
70
|
+
ellipsis: %w{#8230 hellip},
|
71
|
+
en_dash: %w{#8211 ndash},
|
72
|
+
em_dash: %w{#8212 mdash},
|
73
|
+
frac14: %w{#188 frac14},
|
74
|
+
frac12: %w{#189 frac12},
|
75
|
+
frac34: %w{#190 frac34},
|
76
|
+
gt: %w{#62 gt},
|
77
|
+
lt: %w{#60 lt},
|
78
|
+
nbsp: %w{#160 nbsp},
|
79
|
+
pound: %w{#163 pound},
|
80
|
+
reg: %w{#174 reg},
|
81
|
+
single_quote: %w{#39 #821[678] apos lsquo rsquo sbquo},
|
82
|
+
times: %w{#215 times},
|
83
|
+
trade: %w{#8482 trade},
|
84
|
+
yen: %w{#165 yen},
|
84
85
|
}
|
85
86
|
base.inject({}) do |hash, content|
|
86
87
|
key, expression = content
|
@@ -90,7 +91,7 @@ module Stringex
|
|
90
91
|
}.call
|
91
92
|
|
92
93
|
HTML_TAG = Proc.new(){
|
93
|
-
name = /[\w
|
94
|
+
name = /[\w:-]+/
|
94
95
|
value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
|
95
96
|
attr = /(#{name}(\s*=\s*#{value})?)/
|
96
97
|
/<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
|
@@ -102,23 +103,25 @@ module Stringex
|
|
102
103
|
/…/ => "...",
|
103
104
|
}
|
104
105
|
|
106
|
+
UNREADABLE_CONTROL_CHARACTERS = /[[:cntrl:]]/
|
107
|
+
|
105
108
|
# Ordered by denominator then numerator of the value
|
106
109
|
VULGAR_FRACTIONS = {
|
107
|
-
:
|
108
|
-
:
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
113
|
-
:
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
118
|
-
:
|
119
|
-
:
|
120
|
-
:
|
121
|
-
:
|
110
|
+
half: /(½|½|½)/,
|
111
|
+
one_third: /(⅓|⅓)/,
|
112
|
+
two_thirds: /(⅔|⅔)/,
|
113
|
+
one_fourth: /(¼|¼|¼)/,
|
114
|
+
three_fourths: /(¾|¾|¾)/,
|
115
|
+
one_fifth: /(⅕|⅕)/,
|
116
|
+
two_fifths: /(⅖|⅖)/,
|
117
|
+
three_fifths: /(⅗|⅗)/,
|
118
|
+
four_fifths: /(⅘|⅘)/,
|
119
|
+
one_sixth: /(⅙|⅙)/,
|
120
|
+
five_sixths: /(⅚|⅚)/,
|
121
|
+
one_eighth: /(⅛|⅛)/,
|
122
|
+
three_eighths: /(⅜|⅜)/,
|
123
|
+
five_eighths: /(⅝|⅝)/,
|
124
|
+
seven_eighths: /(⅞|⅞)/,
|
122
125
|
}
|
123
126
|
|
124
127
|
WHITESPACE = /\s+/
|
@@ -137,6 +140,7 @@ module Stringex
|
|
137
140
|
html_entities
|
138
141
|
html_tag
|
139
142
|
smart_punctuation
|
143
|
+
unreadable_control_characters
|
140
144
|
vulgar_fractions
|
141
145
|
whitespace
|
142
146
|
}.each do |conversion_type|
|
@@ -37,7 +37,7 @@ module Stringex
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def normalize_currency!
|
40
|
-
string.gsub!
|
40
|
+
string.gsub!(/(\d+),(\d+)/, '\1\2')
|
41
41
|
end
|
42
42
|
|
43
43
|
def smart_strip!
|
@@ -59,7 +59,11 @@ module Stringex
|
|
59
59
|
end
|
60
60
|
end
|
61
61
|
|
62
|
-
|
62
|
+
protected
|
63
|
+
|
64
|
+
def unreadable_control_characters
|
65
|
+
string.gsub! expressions.unreadable_control_characters, ''
|
66
|
+
end
|
63
67
|
|
64
68
|
def abbreviations
|
65
69
|
string.gsub! expressions.abbreviation do |x|
|
@@ -107,7 +111,7 @@ module Stringex
|
|
107
111
|
end
|
108
112
|
end
|
109
113
|
|
110
|
-
|
114
|
+
private
|
111
115
|
|
112
116
|
def expressions
|
113
117
|
ConversionExpressions
|
@@ -122,4 +126,4 @@ module Stringex
|
|
122
126
|
end
|
123
127
|
end
|
124
128
|
end
|
125
|
-
end
|
129
|
+
end
|
@@ -4,76 +4,77 @@ module Stringex
|
|
4
4
|
module Localization
|
5
5
|
module DefaultConversions
|
6
6
|
CHARACTERS = {
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
7
|
+
and: "and",
|
8
|
+
at: "at",
|
9
|
+
degrees: "degrees",
|
10
|
+
divide: "divided by",
|
11
|
+
dot: '\1 dot \2',
|
12
|
+
ellipsis: "dot dot dot",
|
13
|
+
equals: "equals",
|
14
|
+
number: "number",
|
15
|
+
percent: "percent",
|
16
|
+
plus: "plus",
|
17
|
+
slash: "slash",
|
18
|
+
star: "star",
|
19
19
|
}
|
20
20
|
|
21
21
|
CURRENCIES_SIMPLE = {
|
22
|
-
:
|
23
|
-
:
|
24
|
-
:
|
25
|
-
:
|
22
|
+
generic: '\1 dollars',
|
23
|
+
dollars: '\1 dollars',
|
24
|
+
euros: '\1 euros',
|
25
|
+
pounds: '\1 pounds',
|
26
|
+
yen: '\1 yen',
|
26
27
|
}
|
27
28
|
CURRENCIES_COMPLEX = {
|
28
|
-
:
|
29
|
-
:
|
30
|
-
:
|
29
|
+
dollars_cents: '\1 dollars \2 cents',
|
30
|
+
euros_cents: '\1 euros \2 cents',
|
31
|
+
pounds_pence: '\1 pounds \2 pence',
|
31
32
|
}
|
32
33
|
CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
|
33
34
|
|
34
35
|
HTML_ENTITIES = {
|
35
|
-
:
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
43
|
-
:
|
44
|
-
:
|
45
|
-
:
|
46
|
-
:
|
47
|
-
:
|
48
|
-
:
|
49
|
-
:
|
50
|
-
:
|
51
|
-
:
|
52
|
-
:
|
53
|
-
:
|
54
|
-
:
|
55
|
-
:
|
36
|
+
amp: "and",
|
37
|
+
cent: " cents",
|
38
|
+
copy: "(c)",
|
39
|
+
deg: " degrees ",
|
40
|
+
divide: " divided by ",
|
41
|
+
double_quote: '"',
|
42
|
+
ellipsis: "...",
|
43
|
+
en_dash: "-",
|
44
|
+
em_dash: "--",
|
45
|
+
frac14: "one fourth",
|
46
|
+
frac12: "half",
|
47
|
+
frac34: "three fourths",
|
48
|
+
gt: ">",
|
49
|
+
lt: "<",
|
50
|
+
nbsp: " ",
|
51
|
+
pound: " pounds ",
|
52
|
+
reg: "(r)",
|
53
|
+
single_quote: "'",
|
54
|
+
times: "x",
|
55
|
+
trade: "(tm)",
|
56
|
+
yen: " yen "
|
56
57
|
}
|
57
58
|
|
58
59
|
TRANSLITERATIONS = {}
|
59
60
|
|
60
61
|
# Ordered by denominator then numerator of the value
|
61
62
|
VULGAR_FRACTIONS = {
|
62
|
-
:
|
63
|
-
:
|
64
|
-
:
|
65
|
-
:
|
66
|
-
:
|
67
|
-
:
|
68
|
-
:
|
69
|
-
:
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
73
|
-
:
|
74
|
-
:
|
75
|
-
:
|
76
|
-
:
|
63
|
+
half: "half",
|
64
|
+
one_third: "one third",
|
65
|
+
two_thirds: "two thirds",
|
66
|
+
one_fourth: "one fourth",
|
67
|
+
three_fourths: "three fourths",
|
68
|
+
one_fifth: "one fifth",
|
69
|
+
two_fifths: "two fifths",
|
70
|
+
three_fifths: "three fifths",
|
71
|
+
four_fifths: "four fifths",
|
72
|
+
one_sixth: "one sixth",
|
73
|
+
five_sixths: "five sixths",
|
74
|
+
one_eighth: "one eighth",
|
75
|
+
three_eighths: "three eighths",
|
76
|
+
five_eighths: "five eighths",
|
77
|
+
seven_eighths: "seven eighths",
|
77
78
|
}
|
78
79
|
|
79
80
|
class << self
|
@@ -45,7 +45,7 @@ module Stringex
|
|
45
45
|
return translation unless translation.nil?
|
46
46
|
|
47
47
|
if locale != default_locale
|
48
|
-
translate scope, key, options.merge(:
|
48
|
+
translate scope, key, options.merge(locale: default_locale)
|
49
49
|
else
|
50
50
|
default_conversion(scope, key) || options[:default]
|
51
51
|
end
|
@@ -83,7 +83,7 @@ module Stringex
|
|
83
83
|
|
84
84
|
def convert(string, options = {}, &block)
|
85
85
|
converter = Converter.new(string, options)
|
86
|
-
converter.instance_exec
|
86
|
+
converter.instance_exec(&block)
|
87
87
|
converter.smart_strip!
|
88
88
|
converter.string
|
89
89
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module Stringex
|
4
4
|
module StringExtensions
|
5
5
|
def self.configure(&block)
|
6
|
-
Stringex::Configuration::StringExtensions.configure
|
6
|
+
Stringex::Configuration::StringExtensions.configure(&block)
|
7
7
|
end
|
8
8
|
|
9
9
|
def self.unconfigure!
|
@@ -52,7 +52,7 @@ module Stringex
|
|
52
52
|
# It allows localization of conversions so you can use it to convert characters into your own language.
|
53
53
|
# Example:
|
54
54
|
#
|
55
|
-
# I18n.backend.store_translations :de, { :
|
55
|
+
# I18n.backend.store_translations :de, { stringex: { characters: { and: "und" } } }
|
56
56
|
# I18n.locale = :de
|
57
57
|
# "ich & dich".convert_misc_characters # => "ich und dich"
|
58
58
|
#
|
@@ -93,11 +93,38 @@ module Stringex
|
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
|
+
def convert_unreadable_control_characters
|
97
|
+
stringex_convert do
|
98
|
+
translate! :unreadable_control_characters
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
96
102
|
# Returns the string limited in size to the value of limit.
|
97
|
-
def limit(limit = nil)
|
98
|
-
limit.nil?
|
103
|
+
def limit(limit = nil, truncate_words = true, whitespace_replacement_token = "-")
|
104
|
+
if limit.nil?
|
105
|
+
self
|
106
|
+
else
|
107
|
+
truncate_words == false ? self.whole_word_limit(limit, whitespace_replacement_token) : self[0...limit]
|
108
|
+
end
|
99
109
|
end
|
100
110
|
|
111
|
+
def whole_word_limit(limit, whitespace_replacement_token = "-")
|
112
|
+
whole_words = []
|
113
|
+
words = self.split(whitespace_replacement_token)
|
114
|
+
|
115
|
+
words.each do |word|
|
116
|
+
if word.size > limit
|
117
|
+
break
|
118
|
+
else
|
119
|
+
whole_words << word
|
120
|
+
limit -= (word.size + 1)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
whole_words.join(whitespace_replacement_token)
|
125
|
+
end
|
126
|
+
|
127
|
+
|
101
128
|
# Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
|
102
129
|
# below to see which methods are run.
|
103
130
|
def remove_formatting(options = {})
|
@@ -105,6 +132,7 @@ module Stringex
|
|
105
132
|
convert_smart_punctuation.
|
106
133
|
convert_accented_html_entities.
|
107
134
|
convert_vulgar_fractions.
|
135
|
+
convert_unreadable_control_characters.
|
108
136
|
convert_miscellaneous_html_entities.
|
109
137
|
convert_miscellaneous_characters(options).
|
110
138
|
to_ascii.
|
@@ -166,8 +194,8 @@ module Stringex
|
|
166
194
|
whitespace_replacement_token = options[:replace_whitespace_with]
|
167
195
|
dummy = remove_formatting(options).
|
168
196
|
replace_whitespace(whitespace_replacement_token).
|
169
|
-
collapse(
|
170
|
-
limit(options[:limit])
|
197
|
+
collapse(whitespace_replacement_token).
|
198
|
+
limit(options[:limit], options[:truncate_words], whitespace_replacement_token)
|
171
199
|
dummy.downcase! unless options[:force_downcase] == false
|
172
200
|
dummy
|
173
201
|
end
|
data/lib/stringex/unidecoder.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
require 'yaml'
|
4
|
+
require 'stringex/localization'
|
4
5
|
|
5
6
|
module Stringex
|
6
7
|
module Unidecoder
|
@@ -14,20 +15,7 @@ module Stringex
|
|
14
15
|
#
|
15
16
|
# You're probably better off just using the added String#to_ascii
|
16
17
|
def decode(string)
|
17
|
-
string.
|
18
|
-
if localized = translate(codepoint)
|
19
|
-
localized
|
20
|
-
else
|
21
|
-
begin
|
22
|
-
unpacked = codepoint.unpack("U")[0]
|
23
|
-
CODEPOINTS[code_group(unpacked)][grouped_point(unpacked)]
|
24
|
-
rescue
|
25
|
-
# Hopefully this won't come up much
|
26
|
-
# TODO: Make this note something to the user that is reportable to me perhaps
|
27
|
-
"?"
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
18
|
+
string.chars.map{|char| decoded(char)}.join
|
31
19
|
end
|
32
20
|
|
33
21
|
# Returns character for the given Unicode codepoint
|
@@ -49,8 +37,22 @@ module Stringex
|
|
49
37
|
|
50
38
|
private
|
51
39
|
|
52
|
-
def
|
53
|
-
|
40
|
+
def decoded(character)
|
41
|
+
localized(character) || from_yaml(character)
|
42
|
+
end
|
43
|
+
|
44
|
+
def localized(character)
|
45
|
+
Localization.translate(:transliterations, character)
|
46
|
+
end
|
47
|
+
|
48
|
+
def from_yaml(character)
|
49
|
+
return character unless character.ord > 128
|
50
|
+
unpacked = character.unpack("U")[0]
|
51
|
+
CODEPOINTS[code_group(unpacked)][grouped_point(unpacked)]
|
52
|
+
rescue
|
53
|
+
# Hopefully this won't come up much
|
54
|
+
# TODO: Make this note something to the user that is reportable to me perhaps
|
55
|
+
"?"
|
54
56
|
end
|
55
57
|
|
56
58
|
# Returns the Unicode codepoint grouping for the given character
|
@@ -67,12 +69,14 @@ module Stringex
|
|
67
69
|
end
|
68
70
|
|
69
71
|
module Stringex
|
70
|
-
module StringExtensions
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
72
|
+
module StringExtensions
|
73
|
+
module PublicInstanceMethods
|
74
|
+
# Returns string with its UTF-8 characters transliterated to ASCII ones. Example:
|
75
|
+
#
|
76
|
+
# "⠋⠗⠁⠝⠉⠑".to_ascii #=> "france"
|
77
|
+
def to_ascii
|
78
|
+
Stringex::Unidecoder.decode(self)
|
79
|
+
end
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|