stringex 1.5.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +15 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +74 -0
  4. data/README.rdoc +22 -1
  5. data/Rakefile +46 -223
  6. data/VERSION +1 -0
  7. data/init.rb +1 -0
  8. data/lib/stringex.rb +11 -3
  9. data/lib/stringex/acts_as_url.rb +49 -97
  10. data/lib/stringex/acts_as_url/adapter.rb +26 -0
  11. data/lib/stringex/acts_as_url/adapter/active_record.rb +23 -0
  12. data/lib/stringex/acts_as_url/adapter/base.rb +188 -0
  13. data/lib/stringex/acts_as_url/adapter/data_mapper.rb +67 -0
  14. data/lib/stringex/acts_as_url/adapter/mongoid.rb +36 -0
  15. data/lib/stringex/configuration.rb +4 -0
  16. data/lib/stringex/configuration/acts_as_url.rb +44 -0
  17. data/lib/stringex/configuration/base.rb +58 -0
  18. data/lib/stringex/configuration/configurator.rb +25 -0
  19. data/lib/stringex/configuration/string_extensions.rb +19 -0
  20. data/lib/stringex/localization.rb +98 -0
  21. data/lib/stringex/localization/backend/i18n.rb +53 -0
  22. data/lib/stringex/localization/backend/internal.rb +51 -0
  23. data/lib/stringex/localization/conversion_expressions.rb +148 -0
  24. data/lib/stringex/localization/converter.rb +121 -0
  25. data/lib/stringex/localization/default_conversions.rb +88 -0
  26. data/lib/stringex/rails/railtie.rb +10 -0
  27. data/lib/stringex/string_extensions.rb +153 -208
  28. data/lib/stringex/unidecoder.rb +6 -101
  29. data/lib/stringex/unidecoder_data/x00.yml +1 -1
  30. data/lib/stringex/unidecoder_data/x02.yml +5 -5
  31. data/lib/stringex/unidecoder_data/x05.yml +1 -1
  32. data/lib/stringex/unidecoder_data/x06.yml +1 -1
  33. data/lib/stringex/unidecoder_data/x07.yml +3 -3
  34. data/lib/stringex/unidecoder_data/x09.yml +1 -1
  35. data/lib/stringex/unidecoder_data/x0e.yml +2 -2
  36. data/lib/stringex/unidecoder_data/x1f.yml +2 -2
  37. data/lib/stringex/unidecoder_data/x20.yml +1 -1
  38. data/lib/stringex/unidecoder_data/xfb.yml +1 -1
  39. data/lib/stringex/unidecoder_data/xff.yml +1 -1
  40. data/lib/stringex/version.rb +8 -0
  41. data/locales/da.yml +73 -0
  42. data/locales/en.yml +66 -0
  43. data/stringex.gemspec +77 -18
  44. data/test/acts_as_url/adapter/active_record.rb +72 -0
  45. data/test/acts_as_url/adapter/data_mapper.rb +82 -0
  46. data/test/acts_as_url/adapter/mongoid.rb +73 -0
  47. data/test/acts_as_url_configuration_test.rb +51 -0
  48. data/test/acts_as_url_integration_test.rb +271 -0
  49. data/test/localization/da_test.rb +117 -0
  50. data/test/localization/default_test.rb +113 -0
  51. data/test/localization/en_test.rb +117 -0
  52. data/test/localization_test.rb +123 -0
  53. data/test/redcloth_to_html_test.rb +37 -0
  54. data/test/string_extensions_test.rb +59 -91
  55. data/test/test_helper.rb +2 -0
  56. data/test/unicode_point_suite/basic_greek_test.rb +113 -0
  57. data/test/unicode_point_suite/basic_latin_test.rb +142 -0
  58. data/test/unicode_point_suite/codepoint_test_helper.rb +32 -0
  59. data/test/unidecoder/bad_localization.yml +1 -0
  60. data/test/unidecoder/localization.yml +4 -0
  61. data/test/unidecoder_test.rb +3 -5
  62. metadata +145 -37
  63. data/test/acts_as_url_test.rb +0 -272
@@ -0,0 +1,148 @@
1
+ # encoding: UTF-8
2
+
3
+ module Stringex
4
+ module Localization
5
+ module ConversionExpressions
6
+ ABBREVIATION = /(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/
7
+
8
+ ACCENTED_HTML_ENTITY = /&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/
9
+
10
+ APOSTROPHE = /(^|[[:alpha:]])'|`([[:alpha:]]|$)/
11
+
12
+ CHARACTERS = {
13
+ :and => /\s*&\s*/,
14
+ :at => /\s*@\s*/,
15
+ :degrees => /\s*°\s*/,
16
+ :divide => /\s*÷\s*/,
17
+ :dot => /(\S|^)\.(\S)/,
18
+ :ellipsis => /\s*\.{3,}\s*/,
19
+ :equals => /\s*=\s*/,
20
+ :number => /\s*#/,
21
+ :percent => /\s*%\s*/,
22
+ :plus => /\s*\+\s*/,
23
+ :slash => /\s*(\\|\/|/)\s*/,
24
+ :star => /\s*\*\s*/,
25
+ }
26
+
27
+ # Things that just get converted to spaces
28
+ CLEANUP_CHARACTERS = /[\.,:;(){}\[\]\/\?!\^'ʼ"_\|]/
29
+ CLEANUP_HTML_ENTITIES = /&[^;]+;/
30
+
31
+ CURRENCIES_SUPPORTED_SIMPLE = {
32
+ :dollars => /\$/,
33
+ :euros => /€/,
34
+ :pounds => /£/,
35
+ :yen => /¥/,
36
+ }
37
+ CURRENCIES_SUPPORTED_COMPLEX = {
38
+ :dollars => :dollars_cents,
39
+ :euros => :euros_cents,
40
+ :pounds => :pounds_pence,
41
+ }
42
+ CURRENCIES_SUPPORTED = Regexp.new(CURRENCIES_SUPPORTED_SIMPLE.values.join('|'))
43
+ CURRENCIES_SIMPLE = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
44
+ key, expression = content
45
+ hash[key] = /(?:\s|^)#{expression}(\d*)(?:\s|$)/
46
+ hash
47
+ end
48
+ CURRENCIES_COMPLEX = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
49
+ key, expression = content
50
+ # Do we really need to not worry about complex currencies if there are none for the currency?
51
+ complex_key = CURRENCIES_SUPPORTED_COMPLEX[key]
52
+ if complex_key
53
+ hash[complex_key] = /(?:\s|^)#{expression}(\d+)\.(\d+)(?:\s|$)/
54
+ end
55
+ hash
56
+ end
57
+ CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
58
+
59
+ HTML_ENTITIES = Proc.new(){
60
+ base = {
61
+ :amp => %w{#38 amp},
62
+ :cent => %w{#162 cent},
63
+ :copy => %w{#169 copy},
64
+ :deg => %w{#176 deg},
65
+ :divide => %w{#247 divide},
66
+ :double_quote => %w{#34 #822[012] quot ldquo rdquo dbquo},
67
+ :ellipsis => %w{#8230 hellip},
68
+ :en_dash => %w{#8211 ndash},
69
+ :em_dash => %w{#8212 mdash},
70
+ :frac14 => %w{#188 frac14},
71
+ :frac12 => %w{#189 frac12},
72
+ :frac34 => %w{#190 frac34},
73
+ :gt => %w{#62 gt},
74
+ :lt => %w{#60 lt},
75
+ :nbsp => %w{#160 nbsp},
76
+ :pound => %w{#163 pound},
77
+ :reg => %w{#174 reg},
78
+ :single_quote => %w{#39 #821[678] apos lsquo rsquo sbquo},
79
+ :times => %w{#215 times},
80
+ :trade => %w{#8482 trade},
81
+ :yen => %w{#165 yen},
82
+ }
83
+ base.inject({}) do |hash, content|
84
+ key, expression = content
85
+ hash[key] = /&(#{expression.join('|')});/
86
+ hash
87
+ end
88
+ }.call
89
+
90
+ HTML_TAG = Proc.new(){
91
+ name = /[\w:_-]+/
92
+ value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
93
+ attr = /(#{name}(\s*=\s*#{value})?)/
94
+ /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
95
+ }.call
96
+
97
+ SMART_PUNCTUATION = {
98
+ /(“|”|\302\223|\302\224|\303\222|\303\223)/ => '"',
99
+ /(‘|’|\302\221|\302\222|\303\225)/ => "'",
100
+ /…/ => "...",
101
+ }
102
+
103
+ # Ordered by denominator then numerator of the value
104
+ VULGAR_FRACTIONS = {
105
+ :half => /(&#189;|&frac12;|½)/,
106
+ :one_third => /(&#8531;|⅓)/,
107
+ :two_thirds => /(&#8532;|⅔)/,
108
+ :one_fourth => /(&#188;|&frac14;|¼)/,
109
+ :three_fourths => /(&#190;|&frac34;|¾)/,
110
+ :one_fifth => /(&#8533;|⅕)/,
111
+ :two_fifths => /(&#8534;|⅖)/,
112
+ :three_fifths => /(&#8535;|⅗)/,
113
+ :four_fifths => /(&#8536;|⅘)/,
114
+ :one_sixth => /(&#8537;|⅙)/,
115
+ :five_sixths => /(&#8538;|⅚)/,
116
+ :one_eighth => /(&#8539;|⅛)/,
117
+ :three_eighths => /(&#8540;|⅜)/,
118
+ :five_eighths => /(&#8541;|⅝)/,
119
+ :seven_eighths => /(&#8542;|⅞)/,
120
+ }
121
+
122
+ WHITESPACE = /\s+/
123
+
124
+ class << self
125
+ %w{
126
+ abbreviation
127
+ accented_html_entity
128
+ apostrophe
129
+ characters
130
+ cleanup_characters
131
+ cleanup_html_entities
132
+ currencies
133
+ currencies_simple
134
+ currencies_complex
135
+ html_entities
136
+ html_tag
137
+ smart_punctuation
138
+ vulgar_fractions
139
+ whitespace
140
+ }.each do |conversion_type|
141
+ define_method conversion_type do
142
+ const_get conversion_type.upcase
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,121 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'stringex/localization/conversion_expressions'
4
+
5
+ module Stringex
6
+ module Localization
7
+ class Converter
8
+ include ConversionExpressions
9
+
10
+ attr_reader :ending_whitespace, :options, :starting_whitespace, :string
11
+
12
+ def initialize(string, options = {})
13
+ @string = string.dup
14
+ @options = Stringex::Configuration::StringExtensions.default_settings.merge(options)
15
+ string =~ /^(\s+)/
16
+ @starting_whitespace = $1 unless $1 == ''
17
+ string =~ /(\s+)$/
18
+ @ending_whitespace = $1 unless $1 == ''
19
+ end
20
+
21
+ def cleanup_accented_html_entities!
22
+ string.gsub! expressions.accented_html_entity, '\1'
23
+ end
24
+
25
+ def cleanup_characters!
26
+ string.gsub! expressions.cleanup_characters, ' '
27
+ end
28
+
29
+ def cleanup_html_entities!
30
+ string.gsub! expressions.cleanup_html_entities, ''
31
+ end
32
+
33
+ def cleanup_smart_punctuation!
34
+ expressions.smart_punctuation.each do |expression, replacement|
35
+ string.gsub! expression, replacement
36
+ end
37
+ end
38
+
39
+ def smart_strip!
40
+ string.strip!
41
+ @string = "#{starting_whitespace}#{string}#{ending_whitespace}"
42
+ end
43
+
44
+ def strip!
45
+ string.strip!
46
+ end
47
+
48
+ def strip_html_tags!
49
+ string.gsub! expressions.html_tag, ''
50
+ end
51
+
52
+ def translate!(*conversions)
53
+ conversions.each do |conversion|
54
+ send conversion
55
+ end
56
+ end
57
+
58
+ protected
59
+
60
+ def abbreviations
61
+ string.gsub! expressions.abbreviation do |x|
62
+ x.gsub '.', ''
63
+ end
64
+ end
65
+
66
+ def apostrophes
67
+ string.gsub! expressions.apostrophe, '\1\2'
68
+ end
69
+
70
+ def characters
71
+ expressions.characters.each do |key, expression|
72
+ next if key == :slash && options[:allow_slash]
73
+ replacement = translate(key)
74
+ replacement = " #{replacement} " unless key == :dot
75
+ string.gsub! expression, replacement
76
+ end
77
+ end
78
+
79
+ def currencies
80
+ if has_currencies?
81
+ [:currencies_complex, :currencies_simple].each do |type|
82
+ expressions.send(type).each do |key, expression|
83
+ string.gsub! expression, " #{translate(key, :currencies)} "
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ def ellipses
90
+ string.gsub! expressions.characters[:ellipsis], " #{translate(:ellipsis)} "
91
+ end
92
+
93
+ def html_entities
94
+ expressions.html_entities.each do |key, expression|
95
+ string.gsub! expression, translate(key, :html_entities)
96
+ end
97
+ string.squeeze! ' '
98
+ end
99
+
100
+ def vulgar_fractions
101
+ expressions.vulgar_fractions.each do |key, expression|
102
+ string.gsub! expression, translate(key, :vulgar_fractions)
103
+ end
104
+ end
105
+
106
+ private
107
+
108
+ def expressions
109
+ ConversionExpressions
110
+ end
111
+
112
+ def has_currencies?
113
+ string =~ CURRENCIES_SUPPORTED
114
+ end
115
+
116
+ def translate(key, scope = :characters)
117
+ Localization.translate scope, key
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,88 @@
1
+ # encoding: UTF-8
2
+
3
+ module Stringex
4
+ module Localization
5
+ module DefaultConversions
6
+ CHARACTERS = {
7
+ :and => "and",
8
+ :at => "at",
9
+ :degrees => "degrees",
10
+ :divide => "divided by",
11
+ :dot => '\1 dot \2',
12
+ :ellipsis => "dot dot dot",
13
+ :equals => "equals",
14
+ :number => "number",
15
+ :percent => "percent",
16
+ :plus => "plus",
17
+ :slash => "slash",
18
+ :star => "star",
19
+ }
20
+
21
+ CURRENCIES_SIMPLE = {
22
+ :dollars => '\1 dollars',
23
+ :euros => '\1 euros',
24
+ :pounds => '\1 pounds',
25
+ :yen => '\1 yen',
26
+ }
27
+ CURRENCIES_COMPLEX = {
28
+ :dollars_cents => '\1 dollars \2 cents',
29
+ :euros_cents => '\1 euros \2 cents',
30
+ :pounds_pence => '\1 pounds \2 pence',
31
+ }
32
+ CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
33
+
34
+ HTML_ENTITIES = {
35
+ :amp => "and",
36
+ :cent => " cents",
37
+ :copy => "(c)",
38
+ :deg => " degrees ",
39
+ :divide => " divided by ",
40
+ :double_quote => '"',
41
+ :ellipsis => "...",
42
+ :en_dash => "-",
43
+ :em_dash => "--",
44
+ :frac14 => "one fourth",
45
+ :frac12 => "half",
46
+ :frac34 => "three fourths",
47
+ :gt => ">",
48
+ :lt => "<",
49
+ :nbsp => " ",
50
+ :pound => " pounds ",
51
+ :reg => "(r)",
52
+ :single_quote => "'",
53
+ :times => "x",
54
+ :trade => "(tm)",
55
+ :yen => " yen "
56
+ }
57
+
58
+ TRANSLITERATIONS = {}
59
+
60
+ # Ordered by denominator then numerator of the value
61
+ VULGAR_FRACTIONS = {
62
+ :half => "half",
63
+ :one_third => "one third",
64
+ :two_thirds => "two thirds",
65
+ :one_fourth => "one fourth",
66
+ :three_fourths => "three fourths",
67
+ :one_fifth => "one fifth",
68
+ :two_fifths => "two fifths",
69
+ :three_fifths => "three fifths",
70
+ :four_fifths => "four fifths",
71
+ :one_sixth => "one sixth",
72
+ :five_sixths => "five sixths",
73
+ :one_eighth => "one eighth",
74
+ :three_eighths => "three eighths",
75
+ :five_eighths => "five eighths",
76
+ :seven_eighths => "seven eighths",
77
+ }
78
+
79
+ class << self
80
+ %w{characters currencies html_entities transliterations vulgar_fractions}.each do |conversion_type|
81
+ define_method conversion_type do
82
+ const_get conversion_type.upcase
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,10 @@
1
+ module Stringex
2
+ class Railtie < ::Rails::Railtie #:nodoc:
3
+ initializer "stringex" do |app|
4
+ locales = app.config.i18n.available_locales
5
+ pattern = locales.blank? ? "*" : "{#{locales.join(',')}}"
6
+ files = Dir[File.join(Stringex::Localization::Backend::I18n::LOAD_PATH_BASE, "#{pattern}.yml")]
7
+ I18n.load_path.concat(files)
8
+ end
9
+ end
10
+ end
@@ -1,236 +1,181 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  module Stringex
4
- # These methods are all added on String class.
5
4
  module StringExtensions
6
- def self.included(base) # :nodoc:
7
- base.extend(ClassMethods)
8
- end
5
+ # These methods are all included into the String class.
6
+ module PublicInstanceMethods
7
+ # Removes specified character from the beginning and/or end of the string and then performs
8
+ # <tt>String#squeeze(character)</tt>, condensing runs of the character within the string.
9
+ #
10
+ # Note: This method has been superceded by ActiveSupport's squish method.
11
+ def collapse(character = " ")
12
+ sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
13
+ end
9
14
 
10
- # Returns the string converted (via Textile/RedCloth) to HTML format
11
- # or self [with a friendly warning] if Redcloth is not available.
12
- #
13
- # Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container
14
- # P element, which is useful behavior for generating header element text, etc.
15
- # This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt>
16
- # except that it makes RedCloth do all the work instead of just gsubbing the return
17
- # from RedCloth.
18
- def to_html(lite_mode = false)
19
- if defined?(RedCloth)
20
- if lite_mode
21
- RedCloth.new(self, [:lite_mode]).to_html
22
- else
23
- if self =~ /<pre>/
24
- RedCloth.new(self).to_html.tr("\t", "")
25
- else
26
- RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "")
27
- end
15
+ # Converts HTML entities into the respective non-accented letters. Examples:
16
+ #
17
+ # "&aacute;".convert_accented_entities # => "a"
18
+ # "&ccedil;".convert_accented_entities # => "c"
19
+ # "&egrave;".convert_accented_entities # => "e"
20
+ # "&icirc;".convert_accented_entities # => "i"
21
+ # "&oslash;".convert_accented_entities # => "o"
22
+ # "&uuml;".convert_accented_entities # => "u"
23
+ #
24
+ # Note: This does not do any conversion of Unicode/ASCII accented-characters. For that
25
+ # functionality please use <tt>to_ascii</tt>.
26
+ def convert_accented_html_entities
27
+ stringex_convert do
28
+ cleanup_accented_html_entities!
28
29
  end
29
- else
30
- warn "String#to_html was called without RedCloth being successfully required"
31
- self
32
30
  end
33
- end
34
31
 
35
- # Create a URI-friendly representation of the string. This is used internally by
36
- # acts_as_url[link:classes/Stringex/ActsAsUrl/ClassMethods.html#M000012]
37
- # but can be called manually in order to generate an URI-friendly version of any string.
38
- def to_url(options = {})
39
- return self if options[:exclude] && options[:exclude].include?(self)
40
- remove_formatting(options).downcase.replace_whitespace("-").collapse("-").limit(options[:limit])
41
- end
32
+ # Converts various common plaintext characters to a more URI-friendly representation.
33
+ # Examples:
34
+ #
35
+ # "foo & bar".convert_misc_characters # => "foo and bar"
36
+ # "Chanel #9".convert_misc_characters # => "Chanel number nine"
37
+ # "user@host".convert_misc_characters # => "user at host"
38
+ # "google.com".convert_misc_characters # => "google dot com"
39
+ # "$10".convert_misc_characters # => "10 dollars"
40
+ # "*69".convert_misc_characters # => "star 69"
41
+ # "100%".convert_misc_characters # => "100 percent"
42
+ # "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
43
+ #
44
+ # It allows localization of conversions so you can use it to convert characters into your own language.
45
+ # Example:
46
+ #
47
+ # I18n.backend.store_translations :de, { :stringex => { :characters => { :and => "und" } } }
48
+ # I18n.locale = :de
49
+ # "ich & dich".convert_misc_characters # => "ich und dich"
50
+ #
51
+ # Note: Because this method will convert any & symbols to the string "and",
52
+ # you should run any methods which convert HTML entities (convert_accented_html_entities and convert_miscellaneous_html_entities)
53
+ # before running this method.
54
+ def convert_miscellaneous_characters(options = {})
55
+ stringex_convert do
56
+ translate! :ellipses, :currencies, :abbreviations, :characters, :apostrophes
57
+ cleanup_characters!
58
+ end
59
+ end
42
60
 
43
- # Returns the string limited in size to the value of limit.
44
- def limit(limit = nil)
45
- limit.nil? ? self : self[0...limit]
46
- end
61
+ # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
62
+ #
63
+ # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
64
+ # to be generated by Textile.
65
+ def convert_miscellaneous_html_entities
66
+ stringex_convert do
67
+ translate! :html_entities
68
+ cleanup_html_entities!
69
+ end
70
+ end
47
71
 
48
- # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
49
- # below to see which methods are run.
50
- def remove_formatting(options = {})
51
- strip_html_tags.
52
- convert_smart_punctuation.
53
- convert_accented_entities.
54
- convert_vulgar_fractions.
55
- convert_misc_entities.
56
- convert_misc_characters(options).
57
- to_ascii.
58
- # NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated
59
- # so we need to do it again just to be safe
60
- convert_misc_characters(options).
61
- collapse
62
- end
72
+ # Converts MS Word 'smart punctuation' to ASCII
73
+ #
74
+ def convert_smart_punctuation
75
+ stringex_convert do
76
+ cleanup_smart_punctuation!
77
+ end
78
+ end
63
79
 
64
- # Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression
65
- # in Typo[http://typosphere.org].
66
- def strip_html_tags(leave_whitespace = false)
67
- name = /[\w:_-]+/
68
- value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
69
- attr = /(#{name}(\s*=\s*#{value})?)/
70
- rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
71
- (leave_whitespace) ? gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip
72
- end
73
- # Converts HTML entities into the respective non-accented letters. Examples:
74
- #
75
- # "&aacute;".convert_accented_entities # => "a"
76
- # "&ccedil;".convert_accented_entities # => "c"
77
- # "&egrave;".convert_accented_entities # => "e"
78
- # "&icirc;".convert_accented_entities # => "i"
79
- # "&oslash;".convert_accented_entities # => "o"
80
- # "&uuml;".convert_accented_entities # => "u"
81
- #
82
- # Note: This does not do any conversion of Unicode/ASCII accented-characters. For that
83
- # functionality please use <tt>to_ascii</tt>.
84
- def convert_accented_entities
85
- gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1').strip
86
- end
80
+ # Converts vulgar fractions from supported HTML entities and Unicode to plain text formats.
81
+ def convert_vulgar_fractions
82
+ stringex_convert do
83
+ translate! :vulgar_fractions
84
+ end
85
+ end
87
86
 
88
- # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
89
- #
90
- # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
91
- # to be generated by Textile.
92
- def convert_misc_entities
93
- dummy = dup
94
- {
95
- "#822[01]" => "\"",
96
- "#821[67]" => "'",
97
- "#8230" => "...",
98
- "#8211" => "-",
99
- "#8212" => "--",
100
- "#215" => "x",
101
- "gt" => ">",
102
- "lt" => "<",
103
- "(#8482|trade)" => "(tm)",
104
- "(#174|reg)" => "(r)",
105
- "(#169|copy)" => "(c)",
106
- "(#38|amp)" => "and",
107
- "nbsp" => " ",
108
- "(#162|cent)" => " cent",
109
- "(#163|pound)" => " pound",
110
- "(#188|frac14)" => "one fourth",
111
- "(#189|frac12)" => "half",
112
- "(#190|frac34)" => "three fourths",
113
- "(#247|divide)" => "divide",
114
- "(#176|deg)" => " degrees "
115
- }.each do |textiled, normal|
116
- dummy.gsub!(/&#{textiled};/, normal)
87
+ # Returns the string limited in size to the value of limit.
88
+ def limit(limit = nil)
89
+ limit.nil? ? self : self[0...limit]
117
90
  end
118
- dummy.gsub(/&[^;]+;/, "").strip
119
- end
120
91
 
121
- # Converts vulgar fractions from supported html entities and unicode to
122
- # plain text formats.
123
- def convert_vulgar_fractions
124
- dummy = dup
125
- {
126
- "(&#188;|&frac14;|¼)" => "one fourth",
127
- "(&#189;|&frac12;|½)" => "half",
128
- "(&#190;|&frac34;|¾)" => "three fourths",
129
- "(&#8531;|⅓)" => "one third",
130
- "(&#8532;|⅔)" => "two thirds",
131
- "(&#8533;|⅕)" => "one fifth",
132
- "(&#8534;|⅖)" => "two fifths",
133
- "(&#8535;|⅗)" => "three fifths",
134
- "(&#8536;|⅘)" => "four fifths",
135
- "(&#8537;|⅙)" => "one sixth",
136
- "(&#8538;|⅚)" => "five sixths",
137
- "(&#8539;|⅛)" => "one eighth",
138
- "(&#8540;|⅜)" => "three eighths",
139
- "(&#8541;|⅝)" => "five eighths",
140
- "(&#8542;|⅞)" => "seven eighths"
141
- }.each do |textiled, normal|
142
- dummy.gsub!(/#{textiled}/, normal)
92
+ # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
93
+ # below to see which methods are run.
94
+ def remove_formatting(options = {})
95
+ strip_html_tags.
96
+ convert_smart_punctuation.
97
+ convert_accented_html_entities.
98
+ convert_vulgar_fractions.
99
+ convert_miscellaneous_html_entities.
100
+ convert_miscellaneous_characters(options).
101
+ to_ascii.
102
+ # NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated
103
+ # so we need to do it again just to be safe
104
+ convert_miscellaneous_characters(options).
105
+ collapse
143
106
  end
144
- dummy
145
- end
146
107
 
147
- # Converts MS Word 'smart punctuation' to ASCII
148
- #
149
- def convert_smart_punctuation
150
- dummy = dup
151
- {
152
-
153
- "(“|”|\302\223|\302\224|\303\222|\303\223)" => '"',
154
- "(‘|’|\302\221|\302\222|\303\225)" => "'",
155
- "…" => "...",
156
- }.each do |smart, normal|
157
- dummy.gsub!(/#{smart}/, normal)
108
+ # Replace runs of whitespace in string. Defaults to a single space but any replacement
109
+ # string may be specified as an argument. Examples:
110
+ #
111
+ # "Foo bar".replace_whitespace # => "Foo bar"
112
+ # "Foo bar".replace_whitespace("-") # => "Foo-bar"
113
+ def replace_whitespace(replacement = " ")
114
+ gsub(/\s+/, replacement)
158
115
  end
159
- dummy.strip
160
- end
161
116
 
162
- # Converts various common plaintext characters to a more URI-friendly representation.
163
- # Examples:
164
- #
165
- # "foo & bar".convert_misc_characters # => "foo and bar"
166
- # "Chanel #9".convert_misc_characters # => "Chanel number nine"
167
- # "user@host".convert_misc_characters # => "user at host"
168
- # "google.com".convert_misc_characters # => "google dot com"
169
- # "$10".convert_misc_characters # => "10 dollars"
170
- # "*69".convert_misc_characters # => "star 69"
171
- # "100%".convert_misc_characters # => "100 percent"
172
- # "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
173
- #
174
- # Note: Because this method will convert any & symbols to the string "and",
175
- # you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities)
176
- # before running this method.
177
- def convert_misc_characters(options = {})
178
- dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule!
179
- # Special rules for money
180
- {
181
- /(\s|^)\$(\d+)\.(\d+)(\s|$)/ => '\2 dollars \3 cents',
182
- /(\s|^)£(\d+)\.(\d+)(\s|$)/u => '\2 pounds \3 pence',
183
- }.each do |found, replaced|
184
- replaced = " #{replaced} " unless replaced =~ /\\1/
185
- dummy.gsub!(found, replaced)
117
+ # Removes HTML tags from text.
118
+ # NOTE: This code is simplified from Tobias Luettke's regular expression in Typo[http://typosphere.org].
119
+ def strip_html_tags(leave_whitespace = false)
120
+ string = stringex_convert do
121
+ strip_html_tags!
122
+ end
123
+ leave_whitespace ? string : string.replace_whitespace(' ')
186
124
  end
187
- # Special rules for abbreviations
188
- dummy.gsub!(/(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/) do |x|
189
- x.gsub(".", "")
125
+
126
+ # Returns the string converted (via Textile/RedCloth) to HTML format
127
+ # or self [with a friendly warning] if Redcloth is not available.
128
+ #
129
+ # Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container
130
+ # P element, which is useful behavior for generating header element text, etc.
131
+ # This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt>
132
+ # except that it makes RedCloth do all the work instead of just gsubbing the return
133
+ # from RedCloth.
134
+ def to_html(lite_mode = false)
135
+ if defined?(RedCloth)
136
+ if lite_mode
137
+ RedCloth.new(self, [:lite_mode]).to_html
138
+ else
139
+ if self =~ /<pre>/
140
+ RedCloth.new(self).to_html.tr("\t", "")
141
+ else
142
+ RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "")
143
+ end
144
+ end
145
+ else
146
+ warn "String#to_html was called without RedCloth being successfully required"
147
+ self
148
+ end
190
149
  end
191
- # Back to normal rules
192
- misc_characters =
193
- {
194
- /\s*&\s*/ => "and",
195
- /\s*#/ => "number",
196
- /\s*@\s*/ => "at",
197
- /(\S|^)\.(\S)/ => '\1 dot \2',
198
- /(\s|^)\$(\d*)(\s|$)/ => '\2 dollars',
199
- /(\s|^)£(\d*)(\s|$)/u => '\2 pounds',
200
- /(\s|^)¥(\d*)(\s|$)/u => '\2 yen',
201
- /\s*\*\s*/ => "star",
202
- /\s*%\s*/ => "percent",
203
- /(\s*=\s*)/ => " equals ",
204
- /\s*\+\s*/ => "plus",
205
- /\s*÷\s*/ => "divide",
206
- /\s*°\s*/ => "degrees"
207
- }
208
- misc_characters[/\s*(\\|\/|/)\s*/] = 'slash' unless options[:allow_slash]
209
- misc_characters.each do |found, replaced|
210
- replaced = " #{replaced} " unless replaced =~ /\\1/
211
- dummy.gsub!(found, replaced)
150
+
151
+ # Create a URI-friendly representation of the string. This is used internally by
152
+ # acts_as_url[link:classes/Stringex/ActsAsUrl/ClassMethods.html#M000012]
153
+ # but can be called manually in order to generate an URI-friendly version of any string.
154
+ def to_url(options = {})
155
+ return self if options[:exclude] && options[:exclude].include?(self)
156
+ options = stringex_default_options.merge(options)
157
+ whitespace_replacement_token = options[:replace_whitespace_with]
158
+ dummy = remove_formatting(options).
159
+ replace_whitespace(whitespace_replacement_token).
160
+ collapse("-").
161
+ limit(options[:limit])
162
+ dummy.downcase! unless options[:force_downcase] == false
163
+ dummy
212
164
  end
213
- dummy = dummy.gsub(/(^|[[:alpha:]])'|`([[:alpha:]]|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'ʼ"_\|]/, " ").strip
214
- end
215
165
 
216
- # Replace runs of whitespace in string. Defaults to a single space but any replacement
217
- # string may be specified as an argument. Examples:
218
- #
219
- # "Foo bar".replace_whitespace # => "Foo bar"
220
- # "Foo bar".replace_whitespace("-") # => "Foo-bar"
221
- def replace_whitespace(replace = " ")
222
- gsub(/\s+/, replace)
223
- end
166
+ private
167
+
168
+ def stringex_convert(options = {}, &block)
169
+ Localization.convert self, options, &block
170
+ end
224
171
 
225
- # Removes specified character from the beginning and/or end of the string and then performs
226
- # <tt>String#squeeze(character)</tt>, condensing runs of the character within the string.
227
- #
228
- # Note: This method has been superceded by ActiveSupport's squish method.
229
- def collapse(character = " ")
230
- sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
172
+ def stringex_default_options
173
+ Stringex::Configuration::StringExtensions.default_settings
174
+ end
231
175
  end
232
176
 
233
- module ClassMethods
177
+ # These methods are extended onto the String class itself.
178
+ module PublicClassMethods
234
179
  # Returns string of random characters with a length matching the specified limit. Excludes 0
235
180
  # to avoid confusion between 0 and O.
236
181
  def random(limit)