stringex 1.5.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +74 -0
- data/README.rdoc +22 -1
- data/Rakefile +46 -223
- data/VERSION +1 -0
- data/init.rb +1 -0
- data/lib/stringex.rb +11 -3
- data/lib/stringex/acts_as_url.rb +49 -97
- data/lib/stringex/acts_as_url/adapter.rb +26 -0
- data/lib/stringex/acts_as_url/adapter/active_record.rb +23 -0
- data/lib/stringex/acts_as_url/adapter/base.rb +188 -0
- data/lib/stringex/acts_as_url/adapter/data_mapper.rb +67 -0
- data/lib/stringex/acts_as_url/adapter/mongoid.rb +36 -0
- data/lib/stringex/configuration.rb +4 -0
- data/lib/stringex/configuration/acts_as_url.rb +44 -0
- data/lib/stringex/configuration/base.rb +58 -0
- data/lib/stringex/configuration/configurator.rb +25 -0
- data/lib/stringex/configuration/string_extensions.rb +19 -0
- data/lib/stringex/localization.rb +98 -0
- data/lib/stringex/localization/backend/i18n.rb +53 -0
- data/lib/stringex/localization/backend/internal.rb +51 -0
- data/lib/stringex/localization/conversion_expressions.rb +148 -0
- data/lib/stringex/localization/converter.rb +121 -0
- data/lib/stringex/localization/default_conversions.rb +88 -0
- data/lib/stringex/rails/railtie.rb +10 -0
- data/lib/stringex/string_extensions.rb +153 -208
- data/lib/stringex/unidecoder.rb +6 -101
- data/lib/stringex/unidecoder_data/x00.yml +1 -1
- data/lib/stringex/unidecoder_data/x02.yml +5 -5
- data/lib/stringex/unidecoder_data/x05.yml +1 -1
- data/lib/stringex/unidecoder_data/x06.yml +1 -1
- data/lib/stringex/unidecoder_data/x07.yml +3 -3
- data/lib/stringex/unidecoder_data/x09.yml +1 -1
- data/lib/stringex/unidecoder_data/x0e.yml +2 -2
- data/lib/stringex/unidecoder_data/x1f.yml +2 -2
- data/lib/stringex/unidecoder_data/x20.yml +1 -1
- data/lib/stringex/unidecoder_data/xfb.yml +1 -1
- data/lib/stringex/unidecoder_data/xff.yml +1 -1
- data/lib/stringex/version.rb +8 -0
- data/locales/da.yml +73 -0
- data/locales/en.yml +66 -0
- data/stringex.gemspec +77 -18
- data/test/acts_as_url/adapter/active_record.rb +72 -0
- data/test/acts_as_url/adapter/data_mapper.rb +82 -0
- data/test/acts_as_url/adapter/mongoid.rb +73 -0
- data/test/acts_as_url_configuration_test.rb +51 -0
- data/test/acts_as_url_integration_test.rb +271 -0
- data/test/localization/da_test.rb +117 -0
- data/test/localization/default_test.rb +113 -0
- data/test/localization/en_test.rb +117 -0
- data/test/localization_test.rb +123 -0
- data/test/redcloth_to_html_test.rb +37 -0
- data/test/string_extensions_test.rb +59 -91
- data/test/test_helper.rb +2 -0
- data/test/unicode_point_suite/basic_greek_test.rb +113 -0
- data/test/unicode_point_suite/basic_latin_test.rb +142 -0
- data/test/unicode_point_suite/codepoint_test_helper.rb +32 -0
- data/test/unidecoder/bad_localization.yml +1 -0
- data/test/unidecoder/localization.yml +4 -0
- data/test/unidecoder_test.rb +3 -5
- metadata +145 -37
- data/test/acts_as_url_test.rb +0 -272
@@ -0,0 +1,148 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Stringex
|
4
|
+
module Localization
|
5
|
+
module ConversionExpressions
|
6
|
+
ABBREVIATION = /(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/
|
7
|
+
|
8
|
+
ACCENTED_HTML_ENTITY = /&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/
|
9
|
+
|
10
|
+
APOSTROPHE = /(^|[[:alpha:]])'|`([[:alpha:]]|$)/
|
11
|
+
|
12
|
+
CHARACTERS = {
|
13
|
+
:and => /\s*&\s*/,
|
14
|
+
:at => /\s*@\s*/,
|
15
|
+
:degrees => /\s*°\s*/,
|
16
|
+
:divide => /\s*÷\s*/,
|
17
|
+
:dot => /(\S|^)\.(\S)/,
|
18
|
+
:ellipsis => /\s*\.{3,}\s*/,
|
19
|
+
:equals => /\s*=\s*/,
|
20
|
+
:number => /\s*#/,
|
21
|
+
:percent => /\s*%\s*/,
|
22
|
+
:plus => /\s*\+\s*/,
|
23
|
+
:slash => /\s*(\\|\/|/)\s*/,
|
24
|
+
:star => /\s*\*\s*/,
|
25
|
+
}
|
26
|
+
|
27
|
+
# Things that just get converted to spaces
|
28
|
+
CLEANUP_CHARACTERS = /[\.,:;(){}\[\]\/\?!\^'ʼ"_\|]/
|
29
|
+
CLEANUP_HTML_ENTITIES = /&[^;]+;/
|
30
|
+
|
31
|
+
CURRENCIES_SUPPORTED_SIMPLE = {
|
32
|
+
:dollars => /\$/,
|
33
|
+
:euros => /€/,
|
34
|
+
:pounds => /£/,
|
35
|
+
:yen => /¥/,
|
36
|
+
}
|
37
|
+
CURRENCIES_SUPPORTED_COMPLEX = {
|
38
|
+
:dollars => :dollars_cents,
|
39
|
+
:euros => :euros_cents,
|
40
|
+
:pounds => :pounds_pence,
|
41
|
+
}
|
42
|
+
CURRENCIES_SUPPORTED = Regexp.new(CURRENCIES_SUPPORTED_SIMPLE.values.join('|'))
|
43
|
+
CURRENCIES_SIMPLE = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
|
44
|
+
key, expression = content
|
45
|
+
hash[key] = /(?:\s|^)#{expression}(\d*)(?:\s|$)/
|
46
|
+
hash
|
47
|
+
end
|
48
|
+
CURRENCIES_COMPLEX = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
|
49
|
+
key, expression = content
|
50
|
+
# Do we really need to not worry about complex currencies if there are none for the currency?
|
51
|
+
complex_key = CURRENCIES_SUPPORTED_COMPLEX[key]
|
52
|
+
if complex_key
|
53
|
+
hash[complex_key] = /(?:\s|^)#{expression}(\d+)\.(\d+)(?:\s|$)/
|
54
|
+
end
|
55
|
+
hash
|
56
|
+
end
|
57
|
+
CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
|
58
|
+
|
59
|
+
HTML_ENTITIES = Proc.new(){
|
60
|
+
base = {
|
61
|
+
:amp => %w{#38 amp},
|
62
|
+
:cent => %w{#162 cent},
|
63
|
+
:copy => %w{#169 copy},
|
64
|
+
:deg => %w{#176 deg},
|
65
|
+
:divide => %w{#247 divide},
|
66
|
+
:double_quote => %w{#34 #822[012] quot ldquo rdquo dbquo},
|
67
|
+
:ellipsis => %w{#8230 hellip},
|
68
|
+
:en_dash => %w{#8211 ndash},
|
69
|
+
:em_dash => %w{#8212 mdash},
|
70
|
+
:frac14 => %w{#188 frac14},
|
71
|
+
:frac12 => %w{#189 frac12},
|
72
|
+
:frac34 => %w{#190 frac34},
|
73
|
+
:gt => %w{#62 gt},
|
74
|
+
:lt => %w{#60 lt},
|
75
|
+
:nbsp => %w{#160 nbsp},
|
76
|
+
:pound => %w{#163 pound},
|
77
|
+
:reg => %w{#174 reg},
|
78
|
+
:single_quote => %w{#39 #821[678] apos lsquo rsquo sbquo},
|
79
|
+
:times => %w{#215 times},
|
80
|
+
:trade => %w{#8482 trade},
|
81
|
+
:yen => %w{#165 yen},
|
82
|
+
}
|
83
|
+
base.inject({}) do |hash, content|
|
84
|
+
key, expression = content
|
85
|
+
hash[key] = /&(#{expression.join('|')});/
|
86
|
+
hash
|
87
|
+
end
|
88
|
+
}.call
|
89
|
+
|
90
|
+
HTML_TAG = Proc.new(){
|
91
|
+
name = /[\w:_-]+/
|
92
|
+
value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
|
93
|
+
attr = /(#{name}(\s*=\s*#{value})?)/
|
94
|
+
/<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
|
95
|
+
}.call
|
96
|
+
|
97
|
+
SMART_PUNCTUATION = {
|
98
|
+
/(“|”|\302\223|\302\224|\303\222|\303\223)/ => '"',
|
99
|
+
/(‘|’|\302\221|\302\222|\303\225)/ => "'",
|
100
|
+
/…/ => "...",
|
101
|
+
}
|
102
|
+
|
103
|
+
# Ordered by denominator then numerator of the value
|
104
|
+
VULGAR_FRACTIONS = {
|
105
|
+
:half => /(½|½|½)/,
|
106
|
+
:one_third => /(⅓|⅓)/,
|
107
|
+
:two_thirds => /(⅔|⅔)/,
|
108
|
+
:one_fourth => /(¼|¼|¼)/,
|
109
|
+
:three_fourths => /(¾|¾|¾)/,
|
110
|
+
:one_fifth => /(⅕|⅕)/,
|
111
|
+
:two_fifths => /(⅖|⅖)/,
|
112
|
+
:three_fifths => /(⅗|⅗)/,
|
113
|
+
:four_fifths => /(⅘|⅘)/,
|
114
|
+
:one_sixth => /(⅙|⅙)/,
|
115
|
+
:five_sixths => /(⅚|⅚)/,
|
116
|
+
:one_eighth => /(⅛|⅛)/,
|
117
|
+
:three_eighths => /(⅜|⅜)/,
|
118
|
+
:five_eighths => /(⅝|⅝)/,
|
119
|
+
:seven_eighths => /(⅞|⅞)/,
|
120
|
+
}
|
121
|
+
|
122
|
+
WHITESPACE = /\s+/
|
123
|
+
|
124
|
+
class << self
|
125
|
+
%w{
|
126
|
+
abbreviation
|
127
|
+
accented_html_entity
|
128
|
+
apostrophe
|
129
|
+
characters
|
130
|
+
cleanup_characters
|
131
|
+
cleanup_html_entities
|
132
|
+
currencies
|
133
|
+
currencies_simple
|
134
|
+
currencies_complex
|
135
|
+
html_entities
|
136
|
+
html_tag
|
137
|
+
smart_punctuation
|
138
|
+
vulgar_fractions
|
139
|
+
whitespace
|
140
|
+
}.each do |conversion_type|
|
141
|
+
define_method conversion_type do
|
142
|
+
const_get conversion_type.upcase
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'stringex/localization/conversion_expressions'
|
4
|
+
|
5
|
+
module Stringex
|
6
|
+
module Localization
|
7
|
+
class Converter
|
8
|
+
include ConversionExpressions
|
9
|
+
|
10
|
+
attr_reader :ending_whitespace, :options, :starting_whitespace, :string
|
11
|
+
|
12
|
+
def initialize(string, options = {})
|
13
|
+
@string = string.dup
|
14
|
+
@options = Stringex::Configuration::StringExtensions.default_settings.merge(options)
|
15
|
+
string =~ /^(\s+)/
|
16
|
+
@starting_whitespace = $1 unless $1 == ''
|
17
|
+
string =~ /(\s+)$/
|
18
|
+
@ending_whitespace = $1 unless $1 == ''
|
19
|
+
end
|
20
|
+
|
21
|
+
def cleanup_accented_html_entities!
|
22
|
+
string.gsub! expressions.accented_html_entity, '\1'
|
23
|
+
end
|
24
|
+
|
25
|
+
def cleanup_characters!
|
26
|
+
string.gsub! expressions.cleanup_characters, ' '
|
27
|
+
end
|
28
|
+
|
29
|
+
def cleanup_html_entities!
|
30
|
+
string.gsub! expressions.cleanup_html_entities, ''
|
31
|
+
end
|
32
|
+
|
33
|
+
def cleanup_smart_punctuation!
|
34
|
+
expressions.smart_punctuation.each do |expression, replacement|
|
35
|
+
string.gsub! expression, replacement
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def smart_strip!
|
40
|
+
string.strip!
|
41
|
+
@string = "#{starting_whitespace}#{string}#{ending_whitespace}"
|
42
|
+
end
|
43
|
+
|
44
|
+
def strip!
|
45
|
+
string.strip!
|
46
|
+
end
|
47
|
+
|
48
|
+
def strip_html_tags!
|
49
|
+
string.gsub! expressions.html_tag, ''
|
50
|
+
end
|
51
|
+
|
52
|
+
def translate!(*conversions)
|
53
|
+
conversions.each do |conversion|
|
54
|
+
send conversion
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
def abbreviations
|
61
|
+
string.gsub! expressions.abbreviation do |x|
|
62
|
+
x.gsub '.', ''
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def apostrophes
|
67
|
+
string.gsub! expressions.apostrophe, '\1\2'
|
68
|
+
end
|
69
|
+
|
70
|
+
def characters
|
71
|
+
expressions.characters.each do |key, expression|
|
72
|
+
next if key == :slash && options[:allow_slash]
|
73
|
+
replacement = translate(key)
|
74
|
+
replacement = " #{replacement} " unless key == :dot
|
75
|
+
string.gsub! expression, replacement
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def currencies
|
80
|
+
if has_currencies?
|
81
|
+
[:currencies_complex, :currencies_simple].each do |type|
|
82
|
+
expressions.send(type).each do |key, expression|
|
83
|
+
string.gsub! expression, " #{translate(key, :currencies)} "
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def ellipses
|
90
|
+
string.gsub! expressions.characters[:ellipsis], " #{translate(:ellipsis)} "
|
91
|
+
end
|
92
|
+
|
93
|
+
def html_entities
|
94
|
+
expressions.html_entities.each do |key, expression|
|
95
|
+
string.gsub! expression, translate(key, :html_entities)
|
96
|
+
end
|
97
|
+
string.squeeze! ' '
|
98
|
+
end
|
99
|
+
|
100
|
+
def vulgar_fractions
|
101
|
+
expressions.vulgar_fractions.each do |key, expression|
|
102
|
+
string.gsub! expression, translate(key, :vulgar_fractions)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def expressions
|
109
|
+
ConversionExpressions
|
110
|
+
end
|
111
|
+
|
112
|
+
def has_currencies?
|
113
|
+
string =~ CURRENCIES_SUPPORTED
|
114
|
+
end
|
115
|
+
|
116
|
+
def translate(key, scope = :characters)
|
117
|
+
Localization.translate scope, key
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Stringex
|
4
|
+
module Localization
|
5
|
+
module DefaultConversions
|
6
|
+
CHARACTERS = {
|
7
|
+
:and => "and",
|
8
|
+
:at => "at",
|
9
|
+
:degrees => "degrees",
|
10
|
+
:divide => "divided by",
|
11
|
+
:dot => '\1 dot \2',
|
12
|
+
:ellipsis => "dot dot dot",
|
13
|
+
:equals => "equals",
|
14
|
+
:number => "number",
|
15
|
+
:percent => "percent",
|
16
|
+
:plus => "plus",
|
17
|
+
:slash => "slash",
|
18
|
+
:star => "star",
|
19
|
+
}
|
20
|
+
|
21
|
+
CURRENCIES_SIMPLE = {
|
22
|
+
:dollars => '\1 dollars',
|
23
|
+
:euros => '\1 euros',
|
24
|
+
:pounds => '\1 pounds',
|
25
|
+
:yen => '\1 yen',
|
26
|
+
}
|
27
|
+
CURRENCIES_COMPLEX = {
|
28
|
+
:dollars_cents => '\1 dollars \2 cents',
|
29
|
+
:euros_cents => '\1 euros \2 cents',
|
30
|
+
:pounds_pence => '\1 pounds \2 pence',
|
31
|
+
}
|
32
|
+
CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
|
33
|
+
|
34
|
+
HTML_ENTITIES = {
|
35
|
+
:amp => "and",
|
36
|
+
:cent => " cents",
|
37
|
+
:copy => "(c)",
|
38
|
+
:deg => " degrees ",
|
39
|
+
:divide => " divided by ",
|
40
|
+
:double_quote => '"',
|
41
|
+
:ellipsis => "...",
|
42
|
+
:en_dash => "-",
|
43
|
+
:em_dash => "--",
|
44
|
+
:frac14 => "one fourth",
|
45
|
+
:frac12 => "half",
|
46
|
+
:frac34 => "three fourths",
|
47
|
+
:gt => ">",
|
48
|
+
:lt => "<",
|
49
|
+
:nbsp => " ",
|
50
|
+
:pound => " pounds ",
|
51
|
+
:reg => "(r)",
|
52
|
+
:single_quote => "'",
|
53
|
+
:times => "x",
|
54
|
+
:trade => "(tm)",
|
55
|
+
:yen => " yen "
|
56
|
+
}
|
57
|
+
|
58
|
+
TRANSLITERATIONS = {}
|
59
|
+
|
60
|
+
# Ordered by denominator then numerator of the value
|
61
|
+
VULGAR_FRACTIONS = {
|
62
|
+
:half => "half",
|
63
|
+
:one_third => "one third",
|
64
|
+
:two_thirds => "two thirds",
|
65
|
+
:one_fourth => "one fourth",
|
66
|
+
:three_fourths => "three fourths",
|
67
|
+
:one_fifth => "one fifth",
|
68
|
+
:two_fifths => "two fifths",
|
69
|
+
:three_fifths => "three fifths",
|
70
|
+
:four_fifths => "four fifths",
|
71
|
+
:one_sixth => "one sixth",
|
72
|
+
:five_sixths => "five sixths",
|
73
|
+
:one_eighth => "one eighth",
|
74
|
+
:three_eighths => "three eighths",
|
75
|
+
:five_eighths => "five eighths",
|
76
|
+
:seven_eighths => "seven eighths",
|
77
|
+
}
|
78
|
+
|
79
|
+
class << self
|
80
|
+
%w{characters currencies html_entities transliterations vulgar_fractions}.each do |conversion_type|
|
81
|
+
define_method conversion_type do
|
82
|
+
const_get conversion_type.upcase
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module Stringex
|
2
|
+
class Railtie < ::Rails::Railtie #:nodoc:
|
3
|
+
initializer "stringex" do |app|
|
4
|
+
locales = app.config.i18n.available_locales
|
5
|
+
pattern = locales.blank? ? "*" : "{#{locales.join(',')}}"
|
6
|
+
files = Dir[File.join(Stringex::Localization::Backend::I18n::LOAD_PATH_BASE, "#{pattern}.yml")]
|
7
|
+
I18n.load_path.concat(files)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -1,236 +1,181 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
module Stringex
|
4
|
-
# These methods are all added on String class.
|
5
4
|
module StringExtensions
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
# These methods are all included into the String class.
|
6
|
+
module PublicInstanceMethods
|
7
|
+
# Removes specified character from the beginning and/or end of the string and then performs
|
8
|
+
# <tt>String#squeeze(character)</tt>, condensing runs of the character within the string.
|
9
|
+
#
|
10
|
+
# Note: This method has been superceded by ActiveSupport's squish method.
|
11
|
+
def collapse(character = " ")
|
12
|
+
sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
|
13
|
+
end
|
9
14
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
RedCloth.new(self).to_html.tr("\t", "")
|
25
|
-
else
|
26
|
-
RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "")
|
27
|
-
end
|
15
|
+
# Converts HTML entities into the respective non-accented letters. Examples:
|
16
|
+
#
|
17
|
+
# "á".convert_accented_entities # => "a"
|
18
|
+
# "ç".convert_accented_entities # => "c"
|
19
|
+
# "è".convert_accented_entities # => "e"
|
20
|
+
# "î".convert_accented_entities # => "i"
|
21
|
+
# "ø".convert_accented_entities # => "o"
|
22
|
+
# "ü".convert_accented_entities # => "u"
|
23
|
+
#
|
24
|
+
# Note: This does not do any conversion of Unicode/ASCII accented-characters. For that
|
25
|
+
# functionality please use <tt>to_ascii</tt>.
|
26
|
+
def convert_accented_html_entities
|
27
|
+
stringex_convert do
|
28
|
+
cleanup_accented_html_entities!
|
28
29
|
end
|
29
|
-
else
|
30
|
-
warn "String#to_html was called without RedCloth being successfully required"
|
31
|
-
self
|
32
30
|
end
|
33
|
-
end
|
34
31
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
# Converts various common plaintext characters to a more URI-friendly representation.
|
33
|
+
# Examples:
|
34
|
+
#
|
35
|
+
# "foo & bar".convert_misc_characters # => "foo and bar"
|
36
|
+
# "Chanel #9".convert_misc_characters # => "Chanel number nine"
|
37
|
+
# "user@host".convert_misc_characters # => "user at host"
|
38
|
+
# "google.com".convert_misc_characters # => "google dot com"
|
39
|
+
# "$10".convert_misc_characters # => "10 dollars"
|
40
|
+
# "*69".convert_misc_characters # => "star 69"
|
41
|
+
# "100%".convert_misc_characters # => "100 percent"
|
42
|
+
# "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
|
43
|
+
#
|
44
|
+
# It allows localization of conversions so you can use it to convert characters into your own language.
|
45
|
+
# Example:
|
46
|
+
#
|
47
|
+
# I18n.backend.store_translations :de, { :stringex => { :characters => { :and => "und" } } }
|
48
|
+
# I18n.locale = :de
|
49
|
+
# "ich & dich".convert_misc_characters # => "ich und dich"
|
50
|
+
#
|
51
|
+
# Note: Because this method will convert any & symbols to the string "and",
|
52
|
+
# you should run any methods which convert HTML entities (convert_accented_html_entities and convert_miscellaneous_html_entities)
|
53
|
+
# before running this method.
|
54
|
+
def convert_miscellaneous_characters(options = {})
|
55
|
+
stringex_convert do
|
56
|
+
translate! :ellipses, :currencies, :abbreviations, :characters, :apostrophes
|
57
|
+
cleanup_characters!
|
58
|
+
end
|
59
|
+
end
|
42
60
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
61
|
+
# Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
|
62
|
+
#
|
63
|
+
# Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
|
64
|
+
# to be generated by Textile.
|
65
|
+
def convert_miscellaneous_html_entities
|
66
|
+
stringex_convert do
|
67
|
+
translate! :html_entities
|
68
|
+
cleanup_html_entities!
|
69
|
+
end
|
70
|
+
end
|
47
71
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
convert_misc_entities.
|
56
|
-
convert_misc_characters(options).
|
57
|
-
to_ascii.
|
58
|
-
# NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated
|
59
|
-
# so we need to do it again just to be safe
|
60
|
-
convert_misc_characters(options).
|
61
|
-
collapse
|
62
|
-
end
|
72
|
+
# Converts MS Word 'smart punctuation' to ASCII
|
73
|
+
#
|
74
|
+
def convert_smart_punctuation
|
75
|
+
stringex_convert do
|
76
|
+
cleanup_smart_punctuation!
|
77
|
+
end
|
78
|
+
end
|
63
79
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
|
71
|
-
(leave_whitespace) ? gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip
|
72
|
-
end
|
73
|
-
# Converts HTML entities into the respective non-accented letters. Examples:
|
74
|
-
#
|
75
|
-
# "á".convert_accented_entities # => "a"
|
76
|
-
# "ç".convert_accented_entities # => "c"
|
77
|
-
# "è".convert_accented_entities # => "e"
|
78
|
-
# "î".convert_accented_entities # => "i"
|
79
|
-
# "ø".convert_accented_entities # => "o"
|
80
|
-
# "ü".convert_accented_entities # => "u"
|
81
|
-
#
|
82
|
-
# Note: This does not do any conversion of Unicode/ASCII accented-characters. For that
|
83
|
-
# functionality please use <tt>to_ascii</tt>.
|
84
|
-
def convert_accented_entities
|
85
|
-
gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1').strip
|
86
|
-
end
|
80
|
+
# Converts vulgar fractions from supported HTML entities and Unicode to plain text formats.
|
81
|
+
def convert_vulgar_fractions
|
82
|
+
stringex_convert do
|
83
|
+
translate! :vulgar_fractions
|
84
|
+
end
|
85
|
+
end
|
87
86
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
# to be generated by Textile.
|
92
|
-
def convert_misc_entities
|
93
|
-
dummy = dup
|
94
|
-
{
|
95
|
-
"#822[01]" => "\"",
|
96
|
-
"#821[67]" => "'",
|
97
|
-
"#8230" => "...",
|
98
|
-
"#8211" => "-",
|
99
|
-
"#8212" => "--",
|
100
|
-
"#215" => "x",
|
101
|
-
"gt" => ">",
|
102
|
-
"lt" => "<",
|
103
|
-
"(#8482|trade)" => "(tm)",
|
104
|
-
"(#174|reg)" => "(r)",
|
105
|
-
"(#169|copy)" => "(c)",
|
106
|
-
"(#38|amp)" => "and",
|
107
|
-
"nbsp" => " ",
|
108
|
-
"(#162|cent)" => " cent",
|
109
|
-
"(#163|pound)" => " pound",
|
110
|
-
"(#188|frac14)" => "one fourth",
|
111
|
-
"(#189|frac12)" => "half",
|
112
|
-
"(#190|frac34)" => "three fourths",
|
113
|
-
"(#247|divide)" => "divide",
|
114
|
-
"(#176|deg)" => " degrees "
|
115
|
-
}.each do |textiled, normal|
|
116
|
-
dummy.gsub!(/&#{textiled};/, normal)
|
87
|
+
# Returns the string limited in size to the value of limit.
|
88
|
+
def limit(limit = nil)
|
89
|
+
limit.nil? ? self : self[0...limit]
|
117
90
|
end
|
118
|
-
dummy.gsub(/&[^;]+;/, "").strip
|
119
|
-
end
|
120
91
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
"(⅙|⅙)" => "one sixth",
|
136
|
-
"(⅚|⅚)" => "five sixths",
|
137
|
-
"(⅛|⅛)" => "one eighth",
|
138
|
-
"(⅜|⅜)" => "three eighths",
|
139
|
-
"(⅝|⅝)" => "five eighths",
|
140
|
-
"(⅞|⅞)" => "seven eighths"
|
141
|
-
}.each do |textiled, normal|
|
142
|
-
dummy.gsub!(/#{textiled}/, normal)
|
92
|
+
# Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
|
93
|
+
# below to see which methods are run.
|
94
|
+
def remove_formatting(options = {})
|
95
|
+
strip_html_tags.
|
96
|
+
convert_smart_punctuation.
|
97
|
+
convert_accented_html_entities.
|
98
|
+
convert_vulgar_fractions.
|
99
|
+
convert_miscellaneous_html_entities.
|
100
|
+
convert_miscellaneous_characters(options).
|
101
|
+
to_ascii.
|
102
|
+
# NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated
|
103
|
+
# so we need to do it again just to be safe
|
104
|
+
convert_miscellaneous_characters(options).
|
105
|
+
collapse
|
143
106
|
end
|
144
|
-
dummy
|
145
|
-
end
|
146
107
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
"(‘|’|\302\221|\302\222|\303\225)" => "'",
|
155
|
-
"…" => "...",
|
156
|
-
}.each do |smart, normal|
|
157
|
-
dummy.gsub!(/#{smart}/, normal)
|
108
|
+
# Replace runs of whitespace in string. Defaults to a single space but any replacement
|
109
|
+
# string may be specified as an argument. Examples:
|
110
|
+
#
|
111
|
+
# "Foo bar".replace_whitespace # => "Foo bar"
|
112
|
+
# "Foo bar".replace_whitespace("-") # => "Foo-bar"
|
113
|
+
def replace_whitespace(replacement = " ")
|
114
|
+
gsub(/\s+/, replacement)
|
158
115
|
end
|
159
|
-
dummy.strip
|
160
|
-
end
|
161
116
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
# "$10".convert_misc_characters # => "10 dollars"
|
170
|
-
# "*69".convert_misc_characters # => "star 69"
|
171
|
-
# "100%".convert_misc_characters # => "100 percent"
|
172
|
-
# "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
|
173
|
-
#
|
174
|
-
# Note: Because this method will convert any & symbols to the string "and",
|
175
|
-
# you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities)
|
176
|
-
# before running this method.
|
177
|
-
def convert_misc_characters(options = {})
|
178
|
-
dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule!
|
179
|
-
# Special rules for money
|
180
|
-
{
|
181
|
-
/(\s|^)\$(\d+)\.(\d+)(\s|$)/ => '\2 dollars \3 cents',
|
182
|
-
/(\s|^)£(\d+)\.(\d+)(\s|$)/u => '\2 pounds \3 pence',
|
183
|
-
}.each do |found, replaced|
|
184
|
-
replaced = " #{replaced} " unless replaced =~ /\\1/
|
185
|
-
dummy.gsub!(found, replaced)
|
117
|
+
# Removes HTML tags from text.
|
118
|
+
# NOTE: This code is simplified from Tobias Luettke's regular expression in Typo[http://typosphere.org].
|
119
|
+
def strip_html_tags(leave_whitespace = false)
|
120
|
+
string = stringex_convert do
|
121
|
+
strip_html_tags!
|
122
|
+
end
|
123
|
+
leave_whitespace ? string : string.replace_whitespace(' ')
|
186
124
|
end
|
187
|
-
|
188
|
-
|
189
|
-
|
125
|
+
|
126
|
+
# Returns the string converted (via Textile/RedCloth) to HTML format
|
127
|
+
# or self [with a friendly warning] if Redcloth is not available.
|
128
|
+
#
|
129
|
+
# Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container
|
130
|
+
# P element, which is useful behavior for generating header element text, etc.
|
131
|
+
# This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt>
|
132
|
+
# except that it makes RedCloth do all the work instead of just gsubbing the return
|
133
|
+
# from RedCloth.
|
134
|
+
def to_html(lite_mode = false)
|
135
|
+
if defined?(RedCloth)
|
136
|
+
if lite_mode
|
137
|
+
RedCloth.new(self, [:lite_mode]).to_html
|
138
|
+
else
|
139
|
+
if self =~ /<pre>/
|
140
|
+
RedCloth.new(self).to_html.tr("\t", "")
|
141
|
+
else
|
142
|
+
RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "")
|
143
|
+
end
|
144
|
+
end
|
145
|
+
else
|
146
|
+
warn "String#to_html was called without RedCloth being successfully required"
|
147
|
+
self
|
148
|
+
end
|
190
149
|
end
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
/\s*÷\s*/ => "divide",
|
206
|
-
/\s*°\s*/ => "degrees"
|
207
|
-
}
|
208
|
-
misc_characters[/\s*(\\|\/|/)\s*/] = 'slash' unless options[:allow_slash]
|
209
|
-
misc_characters.each do |found, replaced|
|
210
|
-
replaced = " #{replaced} " unless replaced =~ /\\1/
|
211
|
-
dummy.gsub!(found, replaced)
|
150
|
+
|
151
|
+
# Create a URI-friendly representation of the string. This is used internally by
|
152
|
+
# acts_as_url[link:classes/Stringex/ActsAsUrl/ClassMethods.html#M000012]
|
153
|
+
# but can be called manually in order to generate an URI-friendly version of any string.
|
154
|
+
def to_url(options = {})
|
155
|
+
return self if options[:exclude] && options[:exclude].include?(self)
|
156
|
+
options = stringex_default_options.merge(options)
|
157
|
+
whitespace_replacement_token = options[:replace_whitespace_with]
|
158
|
+
dummy = remove_formatting(options).
|
159
|
+
replace_whitespace(whitespace_replacement_token).
|
160
|
+
collapse("-").
|
161
|
+
limit(options[:limit])
|
162
|
+
dummy.downcase! unless options[:force_downcase] == false
|
163
|
+
dummy
|
212
164
|
end
|
213
|
-
dummy = dummy.gsub(/(^|[[:alpha:]])'|`([[:alpha:]]|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'ʼ"_\|]/, " ").strip
|
214
|
-
end
|
215
165
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
def replace_whitespace(replace = " ")
|
222
|
-
gsub(/\s+/, replace)
|
223
|
-
end
|
166
|
+
private
|
167
|
+
|
168
|
+
def stringex_convert(options = {}, &block)
|
169
|
+
Localization.convert self, options, &block
|
170
|
+
end
|
224
171
|
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
# Note: This method has been superceded by ActiveSupport's squish method.
|
229
|
-
def collapse(character = " ")
|
230
|
-
sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
|
172
|
+
def stringex_default_options
|
173
|
+
Stringex::Configuration::StringExtensions.default_settings
|
174
|
+
end
|
231
175
|
end
|
232
176
|
|
233
|
-
|
177
|
+
# These methods are extended onto the String class itself.
|
178
|
+
module PublicClassMethods
|
234
179
|
# Returns string of random characters with a length matching the specified limit. Excludes 0
|
235
180
|
# to avoid confusion between 0 and O.
|
236
181
|
def random(limit)
|