stringex 1.5.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +15 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +74 -0
  4. data/README.rdoc +22 -1
  5. data/Rakefile +46 -223
  6. data/VERSION +1 -0
  7. data/init.rb +1 -0
  8. data/lib/stringex.rb +11 -3
  9. data/lib/stringex/acts_as_url.rb +49 -97
  10. data/lib/stringex/acts_as_url/adapter.rb +26 -0
  11. data/lib/stringex/acts_as_url/adapter/active_record.rb +23 -0
  12. data/lib/stringex/acts_as_url/adapter/base.rb +188 -0
  13. data/lib/stringex/acts_as_url/adapter/data_mapper.rb +67 -0
  14. data/lib/stringex/acts_as_url/adapter/mongoid.rb +36 -0
  15. data/lib/stringex/configuration.rb +4 -0
  16. data/lib/stringex/configuration/acts_as_url.rb +44 -0
  17. data/lib/stringex/configuration/base.rb +58 -0
  18. data/lib/stringex/configuration/configurator.rb +25 -0
  19. data/lib/stringex/configuration/string_extensions.rb +19 -0
  20. data/lib/stringex/localization.rb +98 -0
  21. data/lib/stringex/localization/backend/i18n.rb +53 -0
  22. data/lib/stringex/localization/backend/internal.rb +51 -0
  23. data/lib/stringex/localization/conversion_expressions.rb +148 -0
  24. data/lib/stringex/localization/converter.rb +121 -0
  25. data/lib/stringex/localization/default_conversions.rb +88 -0
  26. data/lib/stringex/rails/railtie.rb +10 -0
  27. data/lib/stringex/string_extensions.rb +153 -208
  28. data/lib/stringex/unidecoder.rb +6 -101
  29. data/lib/stringex/unidecoder_data/x00.yml +1 -1
  30. data/lib/stringex/unidecoder_data/x02.yml +5 -5
  31. data/lib/stringex/unidecoder_data/x05.yml +1 -1
  32. data/lib/stringex/unidecoder_data/x06.yml +1 -1
  33. data/lib/stringex/unidecoder_data/x07.yml +3 -3
  34. data/lib/stringex/unidecoder_data/x09.yml +1 -1
  35. data/lib/stringex/unidecoder_data/x0e.yml +2 -2
  36. data/lib/stringex/unidecoder_data/x1f.yml +2 -2
  37. data/lib/stringex/unidecoder_data/x20.yml +1 -1
  38. data/lib/stringex/unidecoder_data/xfb.yml +1 -1
  39. data/lib/stringex/unidecoder_data/xff.yml +1 -1
  40. data/lib/stringex/version.rb +8 -0
  41. data/locales/da.yml +73 -0
  42. data/locales/en.yml +66 -0
  43. data/stringex.gemspec +77 -18
  44. data/test/acts_as_url/adapter/active_record.rb +72 -0
  45. data/test/acts_as_url/adapter/data_mapper.rb +82 -0
  46. data/test/acts_as_url/adapter/mongoid.rb +73 -0
  47. data/test/acts_as_url_configuration_test.rb +51 -0
  48. data/test/acts_as_url_integration_test.rb +271 -0
  49. data/test/localization/da_test.rb +117 -0
  50. data/test/localization/default_test.rb +113 -0
  51. data/test/localization/en_test.rb +117 -0
  52. data/test/localization_test.rb +123 -0
  53. data/test/redcloth_to_html_test.rb +37 -0
  54. data/test/string_extensions_test.rb +59 -91
  55. data/test/test_helper.rb +2 -0
  56. data/test/unicode_point_suite/basic_greek_test.rb +113 -0
  57. data/test/unicode_point_suite/basic_latin_test.rb +142 -0
  58. data/test/unicode_point_suite/codepoint_test_helper.rb +32 -0
  59. data/test/unidecoder/bad_localization.yml +1 -0
  60. data/test/unidecoder/localization.yml +4 -0
  61. data/test/unidecoder_test.rb +3 -5
  62. metadata +145 -37
  63. data/test/acts_as_url_test.rb +0 -272
@@ -1,5 +1,4 @@
1
1
  # encoding: UTF-8
2
- require "yaml"
3
2
 
4
3
  module Stringex
5
4
  module Unidecoder
@@ -7,15 +6,14 @@ module Stringex
7
6
  CODEPOINTS = Hash.new{|h, k|
8
7
  h[k] = YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml"))
9
8
  } unless defined?(CODEPOINTS)
10
- LOCAL_CODEPOINTS = Hash.new unless defined?(LOCAL_CODEPOINTS)
11
9
 
12
10
  class << self
13
11
  # Returns string with its UTF-8 characters transliterated to ASCII ones
14
12
  #
15
13
  # You're probably better off just using the added String#to_ascii
16
14
  def decode(string)
17
- string.gsub(/[^\x00-\x7f]/u) do |codepoint|
18
- if localized = local_codepoint(codepoint)
15
+ string.gsub(/[^\x00-\x00]/u) do |codepoint|
16
+ if localized = translate(codepoint)
19
17
  localized
20
18
  else
21
19
  begin
@@ -47,66 +45,12 @@ module Stringex
47
45
  "#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})"
48
46
  end
49
47
 
50
- # Adds localized transliterations to Unidecoder
51
- def localize_from(hash_or_path_to_file)
52
- hash = if hash_or_path_to_file.is_a?(Hash)
53
- hash_or_path_to_file
54
- else
55
- YAML.load_file(hash_or_path_to_file)
56
- end
57
- verify_local_codepoints hash
58
- end
59
-
60
- # Returns locale for localized transliterations
61
- def locale
62
- if @locale
63
- @locale
64
- elsif defined?(I18n)
65
- I18n.locale
66
- else
67
- default_locale
68
- end
69
- end
70
-
71
- # Sets locale for localized transliterations
72
- def locale=(new_locale)
73
- @locale = new_locale
74
- end
75
-
76
- # Returns default locale for localized transliterations. NOTE: Will set @locale as well.
77
- def default_locale
78
- @default_locale ||= "en"
79
- @locale = @default_locale
80
- end
81
-
82
- # Sets the default locale for localized transliterations. NOTE: Will set @locale as well.
83
- def default_locale=(new_locale)
84
- @default_locale = new_locale
85
- # Seems logical that @locale should be the new default
86
- @locale = new_locale
87
- end
88
-
89
- # Returns the localized transliteration for a codepoint
90
- def local_codepoint(codepoint)
91
- locale_hash = LOCAL_CODEPOINTS[locale] || LOCAL_CODEPOINTS[locale.is_a?(Symbol) ? locale.to_s : locale.to_sym]
92
- locale_hash && locale_hash[codepoint]
93
- end
94
-
95
- # Runs a block with a temporary locale setting, returning the locale to the original state when complete
96
- def with_locale(new_locale, &block)
97
- new_locale = default_locale if new_locale == :default
98
- original_locale = locale
99
- self.locale = new_locale
100
- block.call
101
- self.locale = original_locale
102
- end
48
+ private
103
49
 
104
- # Runs a block with default locale
105
- def with_default_locale(&block)
106
- with_locale default_locale, &block
50
+ def translate(codepoint)
51
+ Localization.translate(:transliterations, codepoint)
107
52
  end
108
53
 
109
- private
110
54
  # Returns the Unicode codepoint grouping for the given character
111
55
  def code_group(unpacked_character)
112
56
  "x%02x" % (unpacked_character >> 8)
@@ -116,51 +60,12 @@ module Stringex
116
60
  def grouped_point(unpacked_character)
117
61
  unpacked_character & 255
118
62
  end
119
-
120
- # Checks LOCAL_CODEPOINTS's Hash is in the format we expect before assigning it and raises
121
- # instructive exception if not
122
- def verify_local_codepoints(hash)
123
- if !pass_check(hash)
124
- raise ArgumentError, "LOCAL_CODEPOINTS is not correctly defined. Please see the README for more information on how to correctly format this data."
125
- end
126
- hash.each{|k, v| LOCAL_CODEPOINTS[k] = v}
127
- end
128
-
129
- def pass_check(hash)
130
- return false if !hash.is_a?(Hash)
131
- hash.all?{|key, value| pass_check_key_and_value_test(key, value) }
132
- end
133
-
134
- def pass_check_key_and_value_test(key, value)
135
- # Fuck a duck, eh?
136
- return false unless [Symbol, String].include?(key.class)
137
- return false unless value.is_a?(Hash)
138
- value.all?{|k, v| k.is_a?(String) && v.is_a?(String)}
139
- end
140
- end
141
- end
142
-
143
- # Provide a simpler interface for localization implementations
144
- class << self
145
- %w{
146
- localize_from
147
- locale
148
- locale=
149
- default_locale
150
- default_locale=
151
- local_codepoint
152
- with_locale
153
- with_default_locale
154
- }.each do |name|
155
- define_method name do |*args, &block|
156
- Unidecoder.send name, *args, &block
157
- end
158
63
  end
159
64
  end
160
65
  end
161
66
 
162
67
  module Stringex
163
- module StringExtensions
68
+ module StringExtensions::PublicInstanceMethods
164
69
  # Returns string with its UTF-8 characters transliterated to ASCII ones. Example:
165
70
  #
166
71
  # "⠋⠗⠁⠝⠉⠑".to_ascii #=> "france"
@@ -95,7 +95,7 @@
95
95
  - ']'
96
96
  - '^'
97
97
  - _
98
- - '`'
98
+ - "'"
99
99
  - a
100
100
  - b
101
101
  - c
@@ -186,10 +186,10 @@
186
186
  - y
187
187
  - "'"
188
188
  - '"'
189
- - '`'
190
189
  - "'"
191
- - '`'
192
- - '`'
190
+ - "'"
191
+ - "'"
192
+ - "'"
193
193
  - "'"
194
194
  - '?'
195
195
  - '?'
@@ -202,14 +202,14 @@
202
202
  - "'"
203
203
  - '-'
204
204
  - /
205
- - '`'
205
+ - "'"
206
206
  - ','
207
207
  - _
208
208
  - \
209
209
  - /
210
210
  - ':'
211
211
  - .
212
- - '`'
212
+ - "'"
213
213
  - "'"
214
214
  - '^'
215
215
  - V
@@ -225,7 +225,7 @@
225
225
  - n
226
226
  - n
227
227
  - s
228
- - '`'
228
+ - "'"
229
229
  - p
230
230
  - p
231
231
  - ts
@@ -56,7 +56,7 @@
56
56
  - D
57
57
  - T
58
58
  - Z
59
- - '`'
59
+ - "'"
60
60
  - G
61
61
  - '[?]'
62
62
  - '[?]'
@@ -36,7 +36,7 @@
36
36
  - n
37
37
  - s
38
38
  - s
39
- - '`'
39
+ - "'"
40
40
  - p
41
41
  - p
42
42
  - S
@@ -64,7 +64,7 @@
64
64
  - u
65
65
  - o
66
66
  - ''
67
- - '`'
67
+ - "'"
68
68
  - "'"
69
69
  - ''
70
70
  - ''
@@ -161,7 +161,7 @@
161
161
  - d
162
162
  - t
163
163
  - z
164
- - '`'
164
+ - "'"
165
165
  - gh
166
166
  - q
167
167
  - w
@@ -82,7 +82,7 @@
82
82
  - AUM
83
83
  - "'"
84
84
  - "'"
85
- - '`'
85
+ - "'"
86
86
  - "'"
87
87
  - '[?]'
88
88
  - '[?]'
@@ -44,7 +44,7 @@
44
44
  - s
45
45
  - h
46
46
  - l
47
- - '`'
47
+ - "'"
48
48
  - h
49
49
  - '~'
50
50
  - a
@@ -172,7 +172,7 @@
172
172
  - s
173
173
  - h
174
174
  - '[?]'
175
- - '`'
175
+ - "'"
176
176
  - ''
177
177
  - '~'
178
178
  - a
@@ -238,7 +238,7 @@
238
238
  - R
239
239
  - '"`'
240
240
  - "\"'"
241
- - '`'
241
+ - "'"
242
242
  - '[?]'
243
243
  - '[?]'
244
244
  - o
@@ -253,4 +253,4 @@
253
253
  - O
254
254
  - O
255
255
  - "'"
256
- - '`'
256
+ - "'"
@@ -52,7 +52,7 @@
52
52
  - "'"
53
53
  - '"'
54
54
  - "'''"
55
- - '`'
55
+ - "'"
56
56
  - '``'
57
57
  - '```'
58
58
  - '^'
@@ -31,7 +31,7 @@
31
31
  - yi
32
32
  - ''
33
33
  - ay
34
- - '`'
34
+ - "'"
35
35
  - ''
36
36
  - d
37
37
  - h
@@ -63,7 +63,7 @@
63
63
  - ']'
64
64
  - '^'
65
65
  - _
66
- - '`'
66
+ - "'"
67
67
  - a
68
68
  - b
69
69
  - c
@@ -0,0 +1,8 @@
1
+ module Stringex
2
+ module Version
3
+ MAJOR = 1
4
+ MINOR = 5
5
+ PATCH = 0
6
+ STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
+ end
8
+ end
data/locales/da.yml ADDED
@@ -0,0 +1,73 @@
1
+ # NOTE: Some translation keys make use of matches from regular expressions
2
+ # to manipulate whitespace and order. Please consult the source code for
3
+ # Stringex::Localization::ConversionExpressions to see what those
4
+ # regular expressions look like if you need to manipulate the order
5
+ # differently than the usage below.
6
+ da:
7
+ stringex:
8
+ characters:
9
+ and: og
10
+ at: snabel-a
11
+ divide: divideret med
12
+ degrees: grader
13
+ dot: \1 punktum \2
14
+ ellipsis: prik prik prik
15
+ equals: lig med
16
+ number: nummer
17
+ percent: procent
18
+ plus: plus
19
+ slash: skråstreg
20
+ star: stjerne
21
+ currencies:
22
+ dollars: \1 dollars
23
+ dollars_cents: \1 dollars \2 cents
24
+ pounds: \1 pund
25
+ pounds_pence: \1 pund \2 pence
26
+ euros: \1 euro
27
+ euros_cents: \1 euro \2 cent
28
+ yen: \1 yen
29
+ html_entities:
30
+ amp: and
31
+ cent: " cents"
32
+ copy: (c)
33
+ deg: " grader "
34
+ divide: " divideret med "
35
+ double_quote: '"'
36
+ ellipsis: "..."
37
+ en_dash: "-"
38
+ em_dash: "--"
39
+ frac14: en fjerdedel
40
+ frac12: halv
41
+ frac34: tre fjerdedele
42
+ gt: ">"
43
+ lt: <
44
+ nbsp: " "
45
+ pound: " pund "
46
+ reg: (r)
47
+ single_quote: "'"
48
+ times: x
49
+ trade: (tm)
50
+ yen: " yen "
51
+ vulgar_fractions:
52
+ half: halv
53
+ one_third: en tredjedel
54
+ two_thirds: to tredjedele
55
+ one_fourth: en fjerdedel
56
+ three_fourths: tre fjerdedele
57
+ one_fifth: en femtedel
58
+ two_fifths: to femtedele
59
+ three_fifths: tre femtedele
60
+ four_fifths: fire femtedele
61
+ one_sixth: en sjettedel
62
+ five_sixths: fem sjettedele
63
+ one_eighth: en ottendedel
64
+ three_eighths: tre ottendedele
65
+ five_eighths: fem ottendedele
66
+ seven_eighths: syv ottendedele
67
+ transliterations:
68
+ Æ: AE
69
+ Ø: OE
70
+ Å: AA
71
+ æ: ae
72
+ ø: oe
73
+ å: aa
data/locales/en.yml ADDED
@@ -0,0 +1,66 @@
1
+ # NOTE: Some translation keys make use of matches from regular expressions
2
+ # to manipulate whitespace and order. Please consult the source code for
3
+ # Stringex::Localization::ConversionExpressions to see what those
4
+ # regular expressions look like if you need to manipulate the order
5
+ # differently than the usage below.
6
+ en:
7
+ stringex:
8
+ characters:
9
+ and: and
10
+ at: at
11
+ divide: divided by
12
+ degrees: degrees
13
+ dot: \1 dot \2
14
+ ellipsis: dot dot dot
15
+ equals: equals
16
+ number: number
17
+ percent: percent
18
+ plus: plus
19
+ slash: slash
20
+ star: star
21
+ currencies:
22
+ dollars: \1 dollars
23
+ dollars_cents: \1 dollars \2 cents
24
+ pounds: \1 pounds
25
+ pounds_pence: \1 pounds \2 pence
26
+ euros: \1 euros
27
+ euros_cents: \1 euros \2 cents
28
+ yen: \1 yen
29
+ html_entities:
30
+ amp: and
31
+ cent: " cents"
32
+ copy: (c)
33
+ deg: " degrees "
34
+ divide: " divided by "
35
+ double_quote: '"'
36
+ ellipsis: "..."
37
+ en_dash: "-"
38
+ em_dash: "--"
39
+ frac14: one fourth
40
+ frac12: half
41
+ frac34: three fourths
42
+ gt: ">"
43
+ lt: <
44
+ nbsp: " "
45
+ pound: " pounds "
46
+ reg: (r)
47
+ single_quote: "'"
48
+ times: x
49
+ trade: (tm)
50
+ yen: " yen "
51
+ vulgar_fractions:
52
+ half: half
53
+ one_third: one third
54
+ two_thirds: two thirds
55
+ one_fourth: one fourth
56
+ three_fourths: three fourths
57
+ one_fifth: one fifth
58
+ two_fifths: two fifths
59
+ three_fifths: three fifths
60
+ four_fifths: four fifths
61
+ one_sixth: one sixth
62
+ five_sixths: five sixths
63
+ one_eighth: one eighth
64
+ three_eighths: three eighths
65
+ five_eighths: five eighths
66
+ seven_eighths: seven eighths