stringex 1.5.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +15 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +74 -0
  4. data/README.rdoc +22 -1
  5. data/Rakefile +46 -223
  6. data/VERSION +1 -0
  7. data/init.rb +1 -0
  8. data/lib/stringex.rb +11 -3
  9. data/lib/stringex/acts_as_url.rb +49 -97
  10. data/lib/stringex/acts_as_url/adapter.rb +26 -0
  11. data/lib/stringex/acts_as_url/adapter/active_record.rb +23 -0
  12. data/lib/stringex/acts_as_url/adapter/base.rb +188 -0
  13. data/lib/stringex/acts_as_url/adapter/data_mapper.rb +67 -0
  14. data/lib/stringex/acts_as_url/adapter/mongoid.rb +36 -0
  15. data/lib/stringex/configuration.rb +4 -0
  16. data/lib/stringex/configuration/acts_as_url.rb +44 -0
  17. data/lib/stringex/configuration/base.rb +58 -0
  18. data/lib/stringex/configuration/configurator.rb +25 -0
  19. data/lib/stringex/configuration/string_extensions.rb +19 -0
  20. data/lib/stringex/localization.rb +98 -0
  21. data/lib/stringex/localization/backend/i18n.rb +53 -0
  22. data/lib/stringex/localization/backend/internal.rb +51 -0
  23. data/lib/stringex/localization/conversion_expressions.rb +148 -0
  24. data/lib/stringex/localization/converter.rb +121 -0
  25. data/lib/stringex/localization/default_conversions.rb +88 -0
  26. data/lib/stringex/rails/railtie.rb +10 -0
  27. data/lib/stringex/string_extensions.rb +153 -208
  28. data/lib/stringex/unidecoder.rb +6 -101
  29. data/lib/stringex/unidecoder_data/x00.yml +1 -1
  30. data/lib/stringex/unidecoder_data/x02.yml +5 -5
  31. data/lib/stringex/unidecoder_data/x05.yml +1 -1
  32. data/lib/stringex/unidecoder_data/x06.yml +1 -1
  33. data/lib/stringex/unidecoder_data/x07.yml +3 -3
  34. data/lib/stringex/unidecoder_data/x09.yml +1 -1
  35. data/lib/stringex/unidecoder_data/x0e.yml +2 -2
  36. data/lib/stringex/unidecoder_data/x1f.yml +2 -2
  37. data/lib/stringex/unidecoder_data/x20.yml +1 -1
  38. data/lib/stringex/unidecoder_data/xfb.yml +1 -1
  39. data/lib/stringex/unidecoder_data/xff.yml +1 -1
  40. data/lib/stringex/version.rb +8 -0
  41. data/locales/da.yml +73 -0
  42. data/locales/en.yml +66 -0
  43. data/stringex.gemspec +77 -18
  44. data/test/acts_as_url/adapter/active_record.rb +72 -0
  45. data/test/acts_as_url/adapter/data_mapper.rb +82 -0
  46. data/test/acts_as_url/adapter/mongoid.rb +73 -0
  47. data/test/acts_as_url_configuration_test.rb +51 -0
  48. data/test/acts_as_url_integration_test.rb +271 -0
  49. data/test/localization/da_test.rb +117 -0
  50. data/test/localization/default_test.rb +113 -0
  51. data/test/localization/en_test.rb +117 -0
  52. data/test/localization_test.rb +123 -0
  53. data/test/redcloth_to_html_test.rb +37 -0
  54. data/test/string_extensions_test.rb +59 -91
  55. data/test/test_helper.rb +2 -0
  56. data/test/unicode_point_suite/basic_greek_test.rb +113 -0
  57. data/test/unicode_point_suite/basic_latin_test.rb +142 -0
  58. data/test/unicode_point_suite/codepoint_test_helper.rb +32 -0
  59. data/test/unidecoder/bad_localization.yml +1 -0
  60. data/test/unidecoder/localization.yml +4 -0
  61. data/test/unidecoder_test.rb +3 -5
  62. metadata +145 -37
  63. data/test/acts_as_url_test.rb +0 -272
@@ -0,0 +1,148 @@
1
+ # encoding: UTF-8
2
+
3
+ module Stringex
4
+ module Localization
5
+ module ConversionExpressions
6
+ ABBREVIATION = /(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/
7
+
8
+ ACCENTED_HTML_ENTITY = /&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/
9
+
10
+ APOSTROPHE = /(^|[[:alpha:]])'|`([[:alpha:]]|$)/
11
+
12
+ CHARACTERS = {
13
+ :and => /\s*&\s*/,
14
+ :at => /\s*@\s*/,
15
+ :degrees => /\s*°\s*/,
16
+ :divide => /\s*÷\s*/,
17
+ :dot => /(\S|^)\.(\S)/,
18
+ :ellipsis => /\s*\.{3,}\s*/,
19
+ :equals => /\s*=\s*/,
20
+ :number => /\s*#/,
21
+ :percent => /\s*%\s*/,
22
+ :plus => /\s*\+\s*/,
23
+ :slash => /\s*(\\|\/|/)\s*/,
24
+ :star => /\s*\*\s*/,
25
+ }
26
+
27
+ # Things that just get converted to spaces
28
+ CLEANUP_CHARACTERS = /[\.,:;(){}\[\]\/\?!\^'ʼ"_\|]/
29
+ CLEANUP_HTML_ENTITIES = /&[^;]+;/
30
+
31
+ CURRENCIES_SUPPORTED_SIMPLE = {
32
+ :dollars => /\$/,
33
+ :euros => /€/,
34
+ :pounds => /£/,
35
+ :yen => /¥/,
36
+ }
37
+ CURRENCIES_SUPPORTED_COMPLEX = {
38
+ :dollars => :dollars_cents,
39
+ :euros => :euros_cents,
40
+ :pounds => :pounds_pence,
41
+ }
42
+ CURRENCIES_SUPPORTED = Regexp.new(CURRENCIES_SUPPORTED_SIMPLE.values.join('|'))
43
+ CURRENCIES_SIMPLE = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
44
+ key, expression = content
45
+ hash[key] = /(?:\s|^)#{expression}(\d*)(?:\s|$)/
46
+ hash
47
+ end
48
+ CURRENCIES_COMPLEX = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
49
+ key, expression = content
50
+ # Do we really need to not worry about complex currencies if there are none for the currency?
51
+ complex_key = CURRENCIES_SUPPORTED_COMPLEX[key]
52
+ if complex_key
53
+ hash[complex_key] = /(?:\s|^)#{expression}(\d+)\.(\d+)(?:\s|$)/
54
+ end
55
+ hash
56
+ end
57
+ CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
58
+
59
+ HTML_ENTITIES = Proc.new(){
60
+ base = {
61
+ :amp => %w{#38 amp},
62
+ :cent => %w{#162 cent},
63
+ :copy => %w{#169 copy},
64
+ :deg => %w{#176 deg},
65
+ :divide => %w{#247 divide},
66
+ :double_quote => %w{#34 #822[012] quot ldquo rdquo dbquo},
67
+ :ellipsis => %w{#8230 hellip},
68
+ :en_dash => %w{#8211 ndash},
69
+ :em_dash => %w{#8212 mdash},
70
+ :frac14 => %w{#188 frac14},
71
+ :frac12 => %w{#189 frac12},
72
+ :frac34 => %w{#190 frac34},
73
+ :gt => %w{#62 gt},
74
+ :lt => %w{#60 lt},
75
+ :nbsp => %w{#160 nbsp},
76
+ :pound => %w{#163 pound},
77
+ :reg => %w{#174 reg},
78
+ :single_quote => %w{#39 #821[678] apos lsquo rsquo sbquo},
79
+ :times => %w{#215 times},
80
+ :trade => %w{#8482 trade},
81
+ :yen => %w{#165 yen},
82
+ }
83
+ base.inject({}) do |hash, content|
84
+ key, expression = content
85
+ hash[key] = /&(#{expression.join('|')});/
86
+ hash
87
+ end
88
+ }.call
89
+
90
+ HTML_TAG = Proc.new(){
91
+ name = /[\w:_-]+/
92
+ value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
93
+ attr = /(#{name}(\s*=\s*#{value})?)/
94
+ /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
95
+ }.call
96
+
97
+ SMART_PUNCTUATION = {
98
+ /(“|”|\302\223|\302\224|\303\222|\303\223)/ => '"',
99
+ /(‘|’|\302\221|\302\222|\303\225)/ => "'",
100
+ /…/ => "...",
101
+ }
102
+
103
+ # Ordered by denominator then numerator of the value
104
+ VULGAR_FRACTIONS = {
105
+ :half => /(&#189;|&frac12;|½)/,
106
+ :one_third => /(&#8531;|⅓)/,
107
+ :two_thirds => /(&#8532;|⅔)/,
108
+ :one_fourth => /(&#188;|&frac14;|¼)/,
109
+ :three_fourths => /(&#190;|&frac34;|¾)/,
110
+ :one_fifth => /(&#8533;|⅕)/,
111
+ :two_fifths => /(&#8534;|⅖)/,
112
+ :three_fifths => /(&#8535;|⅗)/,
113
+ :four_fifths => /(&#8536;|⅘)/,
114
+ :one_sixth => /(&#8537;|⅙)/,
115
+ :five_sixths => /(&#8538;|⅚)/,
116
+ :one_eighth => /(&#8539;|⅛)/,
117
+ :three_eighths => /(&#8540;|⅜)/,
118
+ :five_eighths => /(&#8541;|⅝)/,
119
+ :seven_eighths => /(&#8542;|⅞)/,
120
+ }
121
+
122
+ WHITESPACE = /\s+/
123
+
124
+ class << self
125
+ %w{
126
+ abbreviation
127
+ accented_html_entity
128
+ apostrophe
129
+ characters
130
+ cleanup_characters
131
+ cleanup_html_entities
132
+ currencies
133
+ currencies_simple
134
+ currencies_complex
135
+ html_entities
136
+ html_tag
137
+ smart_punctuation
138
+ vulgar_fractions
139
+ whitespace
140
+ }.each do |conversion_type|
141
+ define_method conversion_type do
142
+ const_get conversion_type.upcase
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,121 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'stringex/localization/conversion_expressions'
4
+
5
+ module Stringex
6
+ module Localization
7
+ class Converter
8
+ include ConversionExpressions
9
+
10
+ attr_reader :ending_whitespace, :options, :starting_whitespace, :string
11
+
12
+ def initialize(string, options = {})
13
+ @string = string.dup
14
+ @options = Stringex::Configuration::StringExtensions.default_settings.merge(options)
15
+ string =~ /^(\s+)/
16
+ @starting_whitespace = $1 unless $1 == ''
17
+ string =~ /(\s+)$/
18
+ @ending_whitespace = $1 unless $1 == ''
19
+ end
20
+
21
+ def cleanup_accented_html_entities!
22
+ string.gsub! expressions.accented_html_entity, '\1'
23
+ end
24
+
25
+ def cleanup_characters!
26
+ string.gsub! expressions.cleanup_characters, ' '
27
+ end
28
+
29
+ def cleanup_html_entities!
30
+ string.gsub! expressions.cleanup_html_entities, ''
31
+ end
32
+
33
+ def cleanup_smart_punctuation!
34
+ expressions.smart_punctuation.each do |expression, replacement|
35
+ string.gsub! expression, replacement
36
+ end
37
+ end
38
+
39
+ def smart_strip!
40
+ string.strip!
41
+ @string = "#{starting_whitespace}#{string}#{ending_whitespace}"
42
+ end
43
+
44
+ def strip!
45
+ string.strip!
46
+ end
47
+
48
+ def strip_html_tags!
49
+ string.gsub! expressions.html_tag, ''
50
+ end
51
+
52
+ def translate!(*conversions)
53
+ conversions.each do |conversion|
54
+ send conversion
55
+ end
56
+ end
57
+
58
+ protected
59
+
60
+ def abbreviations
61
+ string.gsub! expressions.abbreviation do |x|
62
+ x.gsub '.', ''
63
+ end
64
+ end
65
+
66
+ def apostrophes
67
+ string.gsub! expressions.apostrophe, '\1\2'
68
+ end
69
+
70
+ def characters
71
+ expressions.characters.each do |key, expression|
72
+ next if key == :slash && options[:allow_slash]
73
+ replacement = translate(key)
74
+ replacement = " #{replacement} " unless key == :dot
75
+ string.gsub! expression, replacement
76
+ end
77
+ end
78
+
79
+ def currencies
80
+ if has_currencies?
81
+ [:currencies_complex, :currencies_simple].each do |type|
82
+ expressions.send(type).each do |key, expression|
83
+ string.gsub! expression, " #{translate(key, :currencies)} "
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ def ellipses
90
+ string.gsub! expressions.characters[:ellipsis], " #{translate(:ellipsis)} "
91
+ end
92
+
93
+ def html_entities
94
+ expressions.html_entities.each do |key, expression|
95
+ string.gsub! expression, translate(key, :html_entities)
96
+ end
97
+ string.squeeze! ' '
98
+ end
99
+
100
+ def vulgar_fractions
101
+ expressions.vulgar_fractions.each do |key, expression|
102
+ string.gsub! expression, translate(key, :vulgar_fractions)
103
+ end
104
+ end
105
+
106
+ private
107
+
108
+ def expressions
109
+ ConversionExpressions
110
+ end
111
+
112
+ def has_currencies?
113
+ string =~ CURRENCIES_SUPPORTED
114
+ end
115
+
116
+ def translate(key, scope = :characters)
117
+ Localization.translate scope, key
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,88 @@
1
+ # encoding: UTF-8
2
+
3
+ module Stringex
4
+ module Localization
5
+ module DefaultConversions
6
+ CHARACTERS = {
7
+ :and => "and",
8
+ :at => "at",
9
+ :degrees => "degrees",
10
+ :divide => "divided by",
11
+ :dot => '\1 dot \2',
12
+ :ellipsis => "dot dot dot",
13
+ :equals => "equals",
14
+ :number => "number",
15
+ :percent => "percent",
16
+ :plus => "plus",
17
+ :slash => "slash",
18
+ :star => "star",
19
+ }
20
+
21
+ CURRENCIES_SIMPLE = {
22
+ :dollars => '\1 dollars',
23
+ :euros => '\1 euros',
24
+ :pounds => '\1 pounds',
25
+ :yen => '\1 yen',
26
+ }
27
+ CURRENCIES_COMPLEX = {
28
+ :dollars_cents => '\1 dollars \2 cents',
29
+ :euros_cents => '\1 euros \2 cents',
30
+ :pounds_pence => '\1 pounds \2 pence',
31
+ }
32
+ CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
33
+
34
+ HTML_ENTITIES = {
35
+ :amp => "and",
36
+ :cent => " cents",
37
+ :copy => "(c)",
38
+ :deg => " degrees ",
39
+ :divide => " divided by ",
40
+ :double_quote => '"',
41
+ :ellipsis => "...",
42
+ :en_dash => "-",
43
+ :em_dash => "--",
44
+ :frac14 => "one fourth",
45
+ :frac12 => "half",
46
+ :frac34 => "three fourths",
47
+ :gt => ">",
48
+ :lt => "<",
49
+ :nbsp => " ",
50
+ :pound => " pounds ",
51
+ :reg => "(r)",
52
+ :single_quote => "'",
53
+ :times => "x",
54
+ :trade => "(tm)",
55
+ :yen => " yen "
56
+ }
57
+
58
+ TRANSLITERATIONS = {}
59
+
60
+ # Ordered by denominator then numerator of the value
61
+ VULGAR_FRACTIONS = {
62
+ :half => "half",
63
+ :one_third => "one third",
64
+ :two_thirds => "two thirds",
65
+ :one_fourth => "one fourth",
66
+ :three_fourths => "three fourths",
67
+ :one_fifth => "one fifth",
68
+ :two_fifths => "two fifths",
69
+ :three_fifths => "three fifths",
70
+ :four_fifths => "four fifths",
71
+ :one_sixth => "one sixth",
72
+ :five_sixths => "five sixths",
73
+ :one_eighth => "one eighth",
74
+ :three_eighths => "three eighths",
75
+ :five_eighths => "five eighths",
76
+ :seven_eighths => "seven eighths",
77
+ }
78
+
79
+ class << self
80
+ %w{characters currencies html_entities transliterations vulgar_fractions}.each do |conversion_type|
81
+ define_method conversion_type do
82
+ const_get conversion_type.upcase
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,10 @@
1
+ module Stringex
2
+ class Railtie < ::Rails::Railtie #:nodoc:
3
+ initializer "stringex" do |app|
4
+ locales = app.config.i18n.available_locales
5
+ pattern = locales.blank? ? "*" : "{#{locales.join(',')}}"
6
+ files = Dir[File.join(Stringex::Localization::Backend::I18n::LOAD_PATH_BASE, "#{pattern}.yml")]
7
+ I18n.load_path.concat(files)
8
+ end
9
+ end
10
+ end
@@ -1,236 +1,181 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  module Stringex
4
- # These methods are all added on String class.
5
4
  module StringExtensions
6
- def self.included(base) # :nodoc:
7
- base.extend(ClassMethods)
8
- end
5
+ # These methods are all included into the String class.
6
+ module PublicInstanceMethods
7
+ # Removes specified character from the beginning and/or end of the string and then performs
8
+ # <tt>String#squeeze(character)</tt>, condensing runs of the character within the string.
9
+ #
10
+ # Note: This method has been superceded by ActiveSupport's squish method.
11
+ def collapse(character = " ")
12
+ sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
13
+ end
9
14
 
10
- # Returns the string converted (via Textile/RedCloth) to HTML format
11
- # or self [with a friendly warning] if Redcloth is not available.
12
- #
13
- # Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container
14
- # P element, which is useful behavior for generating header element text, etc.
15
- # This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt>
16
- # except that it makes RedCloth do all the work instead of just gsubbing the return
17
- # from RedCloth.
18
- def to_html(lite_mode = false)
19
- if defined?(RedCloth)
20
- if lite_mode
21
- RedCloth.new(self, [:lite_mode]).to_html
22
- else
23
- if self =~ /<pre>/
24
- RedCloth.new(self).to_html.tr("\t", "")
25
- else
26
- RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "")
27
- end
15
+ # Converts HTML entities into the respective non-accented letters. Examples:
16
+ #
17
+ # "&aacute;".convert_accented_entities # => "a"
18
+ # "&ccedil;".convert_accented_entities # => "c"
19
+ # "&egrave;".convert_accented_entities # => "e"
20
+ # "&icirc;".convert_accented_entities # => "i"
21
+ # "&oslash;".convert_accented_entities # => "o"
22
+ # "&uuml;".convert_accented_entities # => "u"
23
+ #
24
+ # Note: This does not do any conversion of Unicode/ASCII accented-characters. For that
25
+ # functionality please use <tt>to_ascii</tt>.
26
+ def convert_accented_html_entities
27
+ stringex_convert do
28
+ cleanup_accented_html_entities!
28
29
  end
29
- else
30
- warn "String#to_html was called without RedCloth being successfully required"
31
- self
32
30
  end
33
- end
34
31
 
35
- # Create a URI-friendly representation of the string. This is used internally by
36
- # acts_as_url[link:classes/Stringex/ActsAsUrl/ClassMethods.html#M000012]
37
- # but can be called manually in order to generate an URI-friendly version of any string.
38
- def to_url(options = {})
39
- return self if options[:exclude] && options[:exclude].include?(self)
40
- remove_formatting(options).downcase.replace_whitespace("-").collapse("-").limit(options[:limit])
41
- end
32
+ # Converts various common plaintext characters to a more URI-friendly representation.
33
+ # Examples:
34
+ #
35
+ # "foo & bar".convert_misc_characters # => "foo and bar"
36
+ # "Chanel #9".convert_misc_characters # => "Chanel number nine"
37
+ # "user@host".convert_misc_characters # => "user at host"
38
+ # "google.com".convert_misc_characters # => "google dot com"
39
+ # "$10".convert_misc_characters # => "10 dollars"
40
+ # "*69".convert_misc_characters # => "star 69"
41
+ # "100%".convert_misc_characters # => "100 percent"
42
+ # "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
43
+ #
44
+ # It allows localization of conversions so you can use it to convert characters into your own language.
45
+ # Example:
46
+ #
47
+ # I18n.backend.store_translations :de, { :stringex => { :characters => { :and => "und" } } }
48
+ # I18n.locale = :de
49
+ # "ich & dich".convert_misc_characters # => "ich und dich"
50
+ #
51
+ # Note: Because this method will convert any & symbols to the string "and",
52
+ # you should run any methods which convert HTML entities (convert_accented_html_entities and convert_miscellaneous_html_entities)
53
+ # before running this method.
54
+ def convert_miscellaneous_characters(options = {})
55
+ stringex_convert do
56
+ translate! :ellipses, :currencies, :abbreviations, :characters, :apostrophes
57
+ cleanup_characters!
58
+ end
59
+ end
42
60
 
43
- # Returns the string limited in size to the value of limit.
44
- def limit(limit = nil)
45
- limit.nil? ? self : self[0...limit]
46
- end
61
+ # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
62
+ #
63
+ # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
64
+ # to be generated by Textile.
65
+ def convert_miscellaneous_html_entities
66
+ stringex_convert do
67
+ translate! :html_entities
68
+ cleanup_html_entities!
69
+ end
70
+ end
47
71
 
48
- # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
49
- # below to see which methods are run.
50
- def remove_formatting(options = {})
51
- strip_html_tags.
52
- convert_smart_punctuation.
53
- convert_accented_entities.
54
- convert_vulgar_fractions.
55
- convert_misc_entities.
56
- convert_misc_characters(options).
57
- to_ascii.
58
- # NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated
59
- # so we need to do it again just to be safe
60
- convert_misc_characters(options).
61
- collapse
62
- end
72
+ # Converts MS Word 'smart punctuation' to ASCII
73
+ #
74
+ def convert_smart_punctuation
75
+ stringex_convert do
76
+ cleanup_smart_punctuation!
77
+ end
78
+ end
63
79
 
64
- # Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression
65
- # in Typo[http://typosphere.org].
66
- def strip_html_tags(leave_whitespace = false)
67
- name = /[\w:_-]+/
68
- value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
69
- attr = /(#{name}(\s*=\s*#{value})?)/
70
- rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
71
- (leave_whitespace) ? gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip
72
- end
73
- # Converts HTML entities into the respective non-accented letters. Examples:
74
- #
75
- # "&aacute;".convert_accented_entities # => "a"
76
- # "&ccedil;".convert_accented_entities # => "c"
77
- # "&egrave;".convert_accented_entities # => "e"
78
- # "&icirc;".convert_accented_entities # => "i"
79
- # "&oslash;".convert_accented_entities # => "o"
80
- # "&uuml;".convert_accented_entities # => "u"
81
- #
82
- # Note: This does not do any conversion of Unicode/ASCII accented-characters. For that
83
- # functionality please use <tt>to_ascii</tt>.
84
- def convert_accented_entities
85
- gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1').strip
86
- end
80
+ # Converts vulgar fractions from supported HTML entities and Unicode to plain text formats.
81
+ def convert_vulgar_fractions
82
+ stringex_convert do
83
+ translate! :vulgar_fractions
84
+ end
85
+ end
87
86
 
88
- # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
89
- #
90
- # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
91
- # to be generated by Textile.
92
- def convert_misc_entities
93
- dummy = dup
94
- {
95
- "#822[01]" => "\"",
96
- "#821[67]" => "'",
97
- "#8230" => "...",
98
- "#8211" => "-",
99
- "#8212" => "--",
100
- "#215" => "x",
101
- "gt" => ">",
102
- "lt" => "<",
103
- "(#8482|trade)" => "(tm)",
104
- "(#174|reg)" => "(r)",
105
- "(#169|copy)" => "(c)",
106
- "(#38|amp)" => "and",
107
- "nbsp" => " ",
108
- "(#162|cent)" => " cent",
109
- "(#163|pound)" => " pound",
110
- "(#188|frac14)" => "one fourth",
111
- "(#189|frac12)" => "half",
112
- "(#190|frac34)" => "three fourths",
113
- "(#247|divide)" => "divide",
114
- "(#176|deg)" => " degrees "
115
- }.each do |textiled, normal|
116
- dummy.gsub!(/&#{textiled};/, normal)
87
+ # Returns the string limited in size to the value of limit.
88
+ def limit(limit = nil)
89
+ limit.nil? ? self : self[0...limit]
117
90
  end
118
- dummy.gsub(/&[^;]+;/, "").strip
119
- end
120
91
 
121
- # Converts vulgar fractions from supported html entities and unicode to
122
- # plain text formats.
123
- def convert_vulgar_fractions
124
- dummy = dup
125
- {
126
- "(&#188;|&frac14;|¼)" => "one fourth",
127
- "(&#189;|&frac12;|½)" => "half",
128
- "(&#190;|&frac34;|¾)" => "three fourths",
129
- "(&#8531;|⅓)" => "one third",
130
- "(&#8532;|⅔)" => "two thirds",
131
- "(&#8533;|⅕)" => "one fifth",
132
- "(&#8534;|⅖)" => "two fifths",
133
- "(&#8535;|⅗)" => "three fifths",
134
- "(&#8536;|⅘)" => "four fifths",
135
- "(&#8537;|⅙)" => "one sixth",
136
- "(&#8538;|⅚)" => "five sixths",
137
- "(&#8539;|⅛)" => "one eighth",
138
- "(&#8540;|⅜)" => "three eighths",
139
- "(&#8541;|⅝)" => "five eighths",
140
- "(&#8542;|⅞)" => "seven eighths"
141
- }.each do |textiled, normal|
142
- dummy.gsub!(/#{textiled}/, normal)
92
+ # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
93
+ # below to see which methods are run.
94
+ def remove_formatting(options = {})
95
+ strip_html_tags.
96
+ convert_smart_punctuation.
97
+ convert_accented_html_entities.
98
+ convert_vulgar_fractions.
99
+ convert_miscellaneous_html_entities.
100
+ convert_miscellaneous_characters(options).
101
+ to_ascii.
102
+ # NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated
103
+ # so we need to do it again just to be safe
104
+ convert_miscellaneous_characters(options).
105
+ collapse
143
106
  end
144
- dummy
145
- end
146
107
 
147
- # Converts MS Word 'smart punctuation' to ASCII
148
- #
149
- def convert_smart_punctuation
150
- dummy = dup
151
- {
152
-
153
- "(“|”|\302\223|\302\224|\303\222|\303\223)" => '"',
154
- "(‘|’|\302\221|\302\222|\303\225)" => "'",
155
- "…" => "...",
156
- }.each do |smart, normal|
157
- dummy.gsub!(/#{smart}/, normal)
108
+ # Replace runs of whitespace in string. Defaults to a single space but any replacement
109
+ # string may be specified as an argument. Examples:
110
+ #
111
+ # "Foo bar".replace_whitespace # => "Foo bar"
112
+ # "Foo bar".replace_whitespace("-") # => "Foo-bar"
113
+ def replace_whitespace(replacement = " ")
114
+ gsub(/\s+/, replacement)
158
115
  end
159
- dummy.strip
160
- end
161
116
 
162
- # Converts various common plaintext characters to a more URI-friendly representation.
163
- # Examples:
164
- #
165
- # "foo & bar".convert_misc_characters # => "foo and bar"
166
- # "Chanel #9".convert_misc_characters # => "Chanel number nine"
167
- # "user@host".convert_misc_characters # => "user at host"
168
- # "google.com".convert_misc_characters # => "google dot com"
169
- # "$10".convert_misc_characters # => "10 dollars"
170
- # "*69".convert_misc_characters # => "star 69"
171
- # "100%".convert_misc_characters # => "100 percent"
172
- # "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
173
- #
174
- # Note: Because this method will convert any & symbols to the string "and",
175
- # you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities)
176
- # before running this method.
177
- def convert_misc_characters(options = {})
178
- dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule!
179
- # Special rules for money
180
- {
181
- /(\s|^)\$(\d+)\.(\d+)(\s|$)/ => '\2 dollars \3 cents',
182
- /(\s|^)£(\d+)\.(\d+)(\s|$)/u => '\2 pounds \3 pence',
183
- }.each do |found, replaced|
184
- replaced = " #{replaced} " unless replaced =~ /\\1/
185
- dummy.gsub!(found, replaced)
117
+ # Removes HTML tags from text.
118
+ # NOTE: This code is simplified from Tobias Luettke's regular expression in Typo[http://typosphere.org].
119
+ def strip_html_tags(leave_whitespace = false)
120
+ string = stringex_convert do
121
+ strip_html_tags!
122
+ end
123
+ leave_whitespace ? string : string.replace_whitespace(' ')
186
124
  end
187
- # Special rules for abbreviations
188
- dummy.gsub!(/(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/) do |x|
189
- x.gsub(".", "")
125
+
126
+ # Returns the string converted (via Textile/RedCloth) to HTML format
127
+ # or self [with a friendly warning] if Redcloth is not available.
128
+ #
129
+ # Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container
130
+ # P element, which is useful behavior for generating header element text, etc.
131
+ # This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt>
132
+ # except that it makes RedCloth do all the work instead of just gsubbing the return
133
+ # from RedCloth.
134
+ def to_html(lite_mode = false)
135
+ if defined?(RedCloth)
136
+ if lite_mode
137
+ RedCloth.new(self, [:lite_mode]).to_html
138
+ else
139
+ if self =~ /<pre>/
140
+ RedCloth.new(self).to_html.tr("\t", "")
141
+ else
142
+ RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "")
143
+ end
144
+ end
145
+ else
146
+ warn "String#to_html was called without RedCloth being successfully required"
147
+ self
148
+ end
190
149
  end
191
- # Back to normal rules
192
- misc_characters =
193
- {
194
- /\s*&\s*/ => "and",
195
- /\s*#/ => "number",
196
- /\s*@\s*/ => "at",
197
- /(\S|^)\.(\S)/ => '\1 dot \2',
198
- /(\s|^)\$(\d*)(\s|$)/ => '\2 dollars',
199
- /(\s|^)£(\d*)(\s|$)/u => '\2 pounds',
200
- /(\s|^)¥(\d*)(\s|$)/u => '\2 yen',
201
- /\s*\*\s*/ => "star",
202
- /\s*%\s*/ => "percent",
203
- /(\s*=\s*)/ => " equals ",
204
- /\s*\+\s*/ => "plus",
205
- /\s*÷\s*/ => "divide",
206
- /\s*°\s*/ => "degrees"
207
- }
208
- misc_characters[/\s*(\\|\/|/)\s*/] = 'slash' unless options[:allow_slash]
209
- misc_characters.each do |found, replaced|
210
- replaced = " #{replaced} " unless replaced =~ /\\1/
211
- dummy.gsub!(found, replaced)
150
+
151
+ # Create a URI-friendly representation of the string. This is used internally by
152
+ # acts_as_url[link:classes/Stringex/ActsAsUrl/ClassMethods.html#M000012]
153
+ # but can be called manually in order to generate an URI-friendly version of any string.
154
+ def to_url(options = {})
155
+ return self if options[:exclude] && options[:exclude].include?(self)
156
+ options = stringex_default_options.merge(options)
157
+ whitespace_replacement_token = options[:replace_whitespace_with]
158
+ dummy = remove_formatting(options).
159
+ replace_whitespace(whitespace_replacement_token).
160
+ collapse("-").
161
+ limit(options[:limit])
162
+ dummy.downcase! unless options[:force_downcase] == false
163
+ dummy
212
164
  end
213
- dummy = dummy.gsub(/(^|[[:alpha:]])'|`([[:alpha:]]|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'ʼ"_\|]/, " ").strip
214
- end
215
165
 
216
- # Replace runs of whitespace in string. Defaults to a single space but any replacement
217
- # string may be specified as an argument. Examples:
218
- #
219
- # "Foo bar".replace_whitespace # => "Foo bar"
220
- # "Foo bar".replace_whitespace("-") # => "Foo-bar"
221
- def replace_whitespace(replace = " ")
222
- gsub(/\s+/, replace)
223
- end
166
+ private
167
+
168
+ def stringex_convert(options = {}, &block)
169
+ Localization.convert self, options, &block
170
+ end
224
171
 
225
- # Removes specified character from the beginning and/or end of the string and then performs
226
- # <tt>String#squeeze(character)</tt>, condensing runs of the character within the string.
227
- #
228
- # Note: This method has been superceded by ActiveSupport's squish method.
229
- def collapse(character = " ")
230
- sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
172
+ def stringex_default_options
173
+ Stringex::Configuration::StringExtensions.default_settings
174
+ end
231
175
  end
232
176
 
233
- module ClassMethods
177
+ # These methods are extended onto the String class itself.
178
+ module PublicClassMethods
234
179
  # Returns string of random characters with a length matching the specified limit. Excludes 0
235
180
  # to avoid confusion between 0 and O.
236
181
  def random(limit)