stringex 1.5.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +15 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +74 -0
  4. data/README.rdoc +22 -1
  5. data/Rakefile +46 -223
  6. data/VERSION +1 -0
  7. data/init.rb +1 -0
  8. data/lib/stringex.rb +11 -3
  9. data/lib/stringex/acts_as_url.rb +49 -97
  10. data/lib/stringex/acts_as_url/adapter.rb +26 -0
  11. data/lib/stringex/acts_as_url/adapter/active_record.rb +23 -0
  12. data/lib/stringex/acts_as_url/adapter/base.rb +188 -0
  13. data/lib/stringex/acts_as_url/adapter/data_mapper.rb +67 -0
  14. data/lib/stringex/acts_as_url/adapter/mongoid.rb +36 -0
  15. data/lib/stringex/configuration.rb +4 -0
  16. data/lib/stringex/configuration/acts_as_url.rb +44 -0
  17. data/lib/stringex/configuration/base.rb +58 -0
  18. data/lib/stringex/configuration/configurator.rb +25 -0
  19. data/lib/stringex/configuration/string_extensions.rb +19 -0
  20. data/lib/stringex/localization.rb +98 -0
  21. data/lib/stringex/localization/backend/i18n.rb +53 -0
  22. data/lib/stringex/localization/backend/internal.rb +51 -0
  23. data/lib/stringex/localization/conversion_expressions.rb +148 -0
  24. data/lib/stringex/localization/converter.rb +121 -0
  25. data/lib/stringex/localization/default_conversions.rb +88 -0
  26. data/lib/stringex/rails/railtie.rb +10 -0
  27. data/lib/stringex/string_extensions.rb +153 -208
  28. data/lib/stringex/unidecoder.rb +6 -101
  29. data/lib/stringex/unidecoder_data/x00.yml +1 -1
  30. data/lib/stringex/unidecoder_data/x02.yml +5 -5
  31. data/lib/stringex/unidecoder_data/x05.yml +1 -1
  32. data/lib/stringex/unidecoder_data/x06.yml +1 -1
  33. data/lib/stringex/unidecoder_data/x07.yml +3 -3
  34. data/lib/stringex/unidecoder_data/x09.yml +1 -1
  35. data/lib/stringex/unidecoder_data/x0e.yml +2 -2
  36. data/lib/stringex/unidecoder_data/x1f.yml +2 -2
  37. data/lib/stringex/unidecoder_data/x20.yml +1 -1
  38. data/lib/stringex/unidecoder_data/xfb.yml +1 -1
  39. data/lib/stringex/unidecoder_data/xff.yml +1 -1
  40. data/lib/stringex/version.rb +8 -0
  41. data/locales/da.yml +73 -0
  42. data/locales/en.yml +66 -0
  43. data/stringex.gemspec +77 -18
  44. data/test/acts_as_url/adapter/active_record.rb +72 -0
  45. data/test/acts_as_url/adapter/data_mapper.rb +82 -0
  46. data/test/acts_as_url/adapter/mongoid.rb +73 -0
  47. data/test/acts_as_url_configuration_test.rb +51 -0
  48. data/test/acts_as_url_integration_test.rb +271 -0
  49. data/test/localization/da_test.rb +117 -0
  50. data/test/localization/default_test.rb +113 -0
  51. data/test/localization/en_test.rb +117 -0
  52. data/test/localization_test.rb +123 -0
  53. data/test/redcloth_to_html_test.rb +37 -0
  54. data/test/string_extensions_test.rb +59 -91
  55. data/test/test_helper.rb +2 -0
  56. data/test/unicode_point_suite/basic_greek_test.rb +113 -0
  57. data/test/unicode_point_suite/basic_latin_test.rb +142 -0
  58. data/test/unicode_point_suite/codepoint_test_helper.rb +32 -0
  59. data/test/unidecoder/bad_localization.yml +1 -0
  60. data/test/unidecoder/localization.yml +4 -0
  61. data/test/unidecoder_test.rb +3 -5
  62. metadata +145 -37
  63. data/test/acts_as_url_test.rb +0 -272
@@ -1,5 +1,4 @@
1
1
  # encoding: UTF-8
2
- require "yaml"
3
2
 
4
3
  module Stringex
5
4
  module Unidecoder
@@ -7,15 +6,14 @@ module Stringex
7
6
  CODEPOINTS = Hash.new{|h, k|
8
7
  h[k] = YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml"))
9
8
  } unless defined?(CODEPOINTS)
10
- LOCAL_CODEPOINTS = Hash.new unless defined?(LOCAL_CODEPOINTS)
11
9
 
12
10
  class << self
13
11
  # Returns string with its UTF-8 characters transliterated to ASCII ones
14
12
  #
15
13
  # You're probably better off just using the added String#to_ascii
16
14
  def decode(string)
17
- string.gsub(/[^\x00-\x7f]/u) do |codepoint|
18
- if localized = local_codepoint(codepoint)
15
+ string.gsub(/[^\x00-\x00]/u) do |codepoint|
16
+ if localized = translate(codepoint)
19
17
  localized
20
18
  else
21
19
  begin
@@ -47,66 +45,12 @@ module Stringex
47
45
  "#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})"
48
46
  end
49
47
 
50
- # Adds localized transliterations to Unidecoder
51
- def localize_from(hash_or_path_to_file)
52
- hash = if hash_or_path_to_file.is_a?(Hash)
53
- hash_or_path_to_file
54
- else
55
- YAML.load_file(hash_or_path_to_file)
56
- end
57
- verify_local_codepoints hash
58
- end
59
-
60
- # Returns locale for localized transliterations
61
- def locale
62
- if @locale
63
- @locale
64
- elsif defined?(I18n)
65
- I18n.locale
66
- else
67
- default_locale
68
- end
69
- end
70
-
71
- # Sets locale for localized transliterations
72
- def locale=(new_locale)
73
- @locale = new_locale
74
- end
75
-
76
- # Returns default locale for localized transliterations. NOTE: Will set @locale as well.
77
- def default_locale
78
- @default_locale ||= "en"
79
- @locale = @default_locale
80
- end
81
-
82
- # Sets the default locale for localized transliterations. NOTE: Will set @locale as well.
83
- def default_locale=(new_locale)
84
- @default_locale = new_locale
85
- # Seems logical that @locale should be the new default
86
- @locale = new_locale
87
- end
88
-
89
- # Returns the localized transliteration for a codepoint
90
- def local_codepoint(codepoint)
91
- locale_hash = LOCAL_CODEPOINTS[locale] || LOCAL_CODEPOINTS[locale.is_a?(Symbol) ? locale.to_s : locale.to_sym]
92
- locale_hash && locale_hash[codepoint]
93
- end
94
-
95
- # Runs a block with a temporary locale setting, returning the locale to the original state when complete
96
- def with_locale(new_locale, &block)
97
- new_locale = default_locale if new_locale == :default
98
- original_locale = locale
99
- self.locale = new_locale
100
- block.call
101
- self.locale = original_locale
102
- end
48
+ private
103
49
 
104
- # Runs a block with default locale
105
- def with_default_locale(&block)
106
- with_locale default_locale, &block
50
+ def translate(codepoint)
51
+ Localization.translate(:transliterations, codepoint)
107
52
  end
108
53
 
109
- private
110
54
  # Returns the Unicode codepoint grouping for the given character
111
55
  def code_group(unpacked_character)
112
56
  "x%02x" % (unpacked_character >> 8)
@@ -116,51 +60,12 @@ module Stringex
116
60
  def grouped_point(unpacked_character)
117
61
  unpacked_character & 255
118
62
  end
119
-
120
- # Checks LOCAL_CODEPOINTS's Hash is in the format we expect before assigning it and raises
121
- # instructive exception if not
122
- def verify_local_codepoints(hash)
123
- if !pass_check(hash)
124
- raise ArgumentError, "LOCAL_CODEPOINTS is not correctly defined. Please see the README for more information on how to correctly format this data."
125
- end
126
- hash.each{|k, v| LOCAL_CODEPOINTS[k] = v}
127
- end
128
-
129
- def pass_check(hash)
130
- return false if !hash.is_a?(Hash)
131
- hash.all?{|key, value| pass_check_key_and_value_test(key, value) }
132
- end
133
-
134
- def pass_check_key_and_value_test(key, value)
135
- # Fuck a duck, eh?
136
- return false unless [Symbol, String].include?(key.class)
137
- return false unless value.is_a?(Hash)
138
- value.all?{|k, v| k.is_a?(String) && v.is_a?(String)}
139
- end
140
- end
141
- end
142
-
143
- # Provide a simpler interface for localization implementations
144
- class << self
145
- %w{
146
- localize_from
147
- locale
148
- locale=
149
- default_locale
150
- default_locale=
151
- local_codepoint
152
- with_locale
153
- with_default_locale
154
- }.each do |name|
155
- define_method name do |*args, &block|
156
- Unidecoder.send name, *args, &block
157
- end
158
63
  end
159
64
  end
160
65
  end
161
66
 
162
67
  module Stringex
163
- module StringExtensions
68
+ module StringExtensions::PublicInstanceMethods
164
69
  # Returns string with its UTF-8 characters transliterated to ASCII ones. Example:
165
70
  #
166
71
  # "⠋⠗⠁⠝⠉⠑".to_ascii #=> "france"
@@ -95,7 +95,7 @@
95
95
  - ']'
96
96
  - '^'
97
97
  - _
98
- - '`'
98
+ - "'"
99
99
  - a
100
100
  - b
101
101
  - c
@@ -186,10 +186,10 @@
186
186
  - y
187
187
  - "'"
188
188
  - '"'
189
- - '`'
190
189
  - "'"
191
- - '`'
192
- - '`'
190
+ - "'"
191
+ - "'"
192
+ - "'"
193
193
  - "'"
194
194
  - '?'
195
195
  - '?'
@@ -202,14 +202,14 @@
202
202
  - "'"
203
203
  - '-'
204
204
  - /
205
- - '`'
205
+ - "'"
206
206
  - ','
207
207
  - _
208
208
  - \
209
209
  - /
210
210
  - ':'
211
211
  - .
212
- - '`'
212
+ - "'"
213
213
  - "'"
214
214
  - '^'
215
215
  - V
@@ -225,7 +225,7 @@
225
225
  - n
226
226
  - n
227
227
  - s
228
- - '`'
228
+ - "'"
229
229
  - p
230
230
  - p
231
231
  - ts
@@ -56,7 +56,7 @@
56
56
  - D
57
57
  - T
58
58
  - Z
59
- - '`'
59
+ - "'"
60
60
  - G
61
61
  - '[?]'
62
62
  - '[?]'
@@ -36,7 +36,7 @@
36
36
  - n
37
37
  - s
38
38
  - s
39
- - '`'
39
+ - "'"
40
40
  - p
41
41
  - p
42
42
  - S
@@ -64,7 +64,7 @@
64
64
  - u
65
65
  - o
66
66
  - ''
67
- - '`'
67
+ - "'"
68
68
  - "'"
69
69
  - ''
70
70
  - ''
@@ -161,7 +161,7 @@
161
161
  - d
162
162
  - t
163
163
  - z
164
- - '`'
164
+ - "'"
165
165
  - gh
166
166
  - q
167
167
  - w
@@ -82,7 +82,7 @@
82
82
  - AUM
83
83
  - "'"
84
84
  - "'"
85
- - '`'
85
+ - "'"
86
86
  - "'"
87
87
  - '[?]'
88
88
  - '[?]'
@@ -44,7 +44,7 @@
44
44
  - s
45
45
  - h
46
46
  - l
47
- - '`'
47
+ - "'"
48
48
  - h
49
49
  - '~'
50
50
  - a
@@ -172,7 +172,7 @@
172
172
  - s
173
173
  - h
174
174
  - '[?]'
175
- - '`'
175
+ - "'"
176
176
  - ''
177
177
  - '~'
178
178
  - a
@@ -238,7 +238,7 @@
238
238
  - R
239
239
  - '"`'
240
240
  - "\"'"
241
- - '`'
241
+ - "'"
242
242
  - '[?]'
243
243
  - '[?]'
244
244
  - o
@@ -253,4 +253,4 @@
253
253
  - O
254
254
  - O
255
255
  - "'"
256
- - '`'
256
+ - "'"
@@ -52,7 +52,7 @@
52
52
  - "'"
53
53
  - '"'
54
54
  - "'''"
55
- - '`'
55
+ - "'"
56
56
  - '``'
57
57
  - '```'
58
58
  - '^'
@@ -31,7 +31,7 @@
31
31
  - yi
32
32
  - ''
33
33
  - ay
34
- - '`'
34
+ - "'"
35
35
  - ''
36
36
  - d
37
37
  - h
@@ -63,7 +63,7 @@
63
63
  - ']'
64
64
  - '^'
65
65
  - _
66
- - '`'
66
+ - "'"
67
67
  - a
68
68
  - b
69
69
  - c
@@ -0,0 +1,8 @@
1
+ module Stringex
2
+ module Version
3
+ MAJOR = 1
4
+ MINOR = 5
5
+ PATCH = 0
6
+ STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
+ end
8
+ end
data/locales/da.yml ADDED
@@ -0,0 +1,73 @@
1
+ # NOTE: Some translation keys make use of matches from regular expressions
2
+ # to manipulate whitespace and order. Please consult the source code for
3
+ # Stringex::Localization::ConversionExpressions to see what those
4
+ # regular expressions look like if you need to manipulate the order
5
+ # differently than the usage below.
6
+ da:
7
+ stringex:
8
+ characters:
9
+ and: og
10
+ at: snabel-a
11
+ divide: divideret med
12
+ degrees: grader
13
+ dot: \1 punktum \2
14
+ ellipsis: prik prik prik
15
+ equals: lig med
16
+ number: nummer
17
+ percent: procent
18
+ plus: plus
19
+ slash: skråstreg
20
+ star: stjerne
21
+ currencies:
22
+ dollars: \1 dollars
23
+ dollars_cents: \1 dollars \2 cents
24
+ pounds: \1 pund
25
+ pounds_pence: \1 pund \2 pence
26
+ euros: \1 euro
27
+ euros_cents: \1 euro \2 cent
28
+ yen: \1 yen
29
+ html_entities:
30
+ amp: and
31
+ cent: " cents"
32
+ copy: (c)
33
+ deg: " grader "
34
+ divide: " divideret med "
35
+ double_quote: '"'
36
+ ellipsis: "..."
37
+ en_dash: "-"
38
+ em_dash: "--"
39
+ frac14: en fjerdedel
40
+ frac12: halv
41
+ frac34: tre fjerdedele
42
+ gt: ">"
43
+ lt: <
44
+ nbsp: " "
45
+ pound: " pund "
46
+ reg: (r)
47
+ single_quote: "'"
48
+ times: x
49
+ trade: (tm)
50
+ yen: " yen "
51
+ vulgar_fractions:
52
+ half: halv
53
+ one_third: en tredjedel
54
+ two_thirds: to tredjedele
55
+ one_fourth: en fjerdedel
56
+ three_fourths: tre fjerdedele
57
+ one_fifth: en femtedel
58
+ two_fifths: to femtedele
59
+ three_fifths: tre femtedele
60
+ four_fifths: fire femtedele
61
+ one_sixth: en sjettedel
62
+ five_sixths: fem sjettedele
63
+ one_eighth: en ottendedel
64
+ three_eighths: tre ottendedele
65
+ five_eighths: fem ottendedele
66
+ seven_eighths: syv ottendedele
67
+ transliterations:
68
+ Æ: AE
69
+ Ø: OE
70
+ Å: AA
71
+ æ: ae
72
+ ø: oe
73
+ å: aa
data/locales/en.yml ADDED
@@ -0,0 +1,66 @@
1
+ # NOTE: Some translation keys make use of matches from regular expressions
2
+ # to manipulate whitespace and order. Please consult the source code for
3
+ # Stringex::Localization::ConversionExpressions to see what those
4
+ # regular expressions look like if you need to manipulate the order
5
+ # differently than the usage below.
6
+ en:
7
+ stringex:
8
+ characters:
9
+ and: and
10
+ at: at
11
+ divide: divided by
12
+ degrees: degrees
13
+ dot: \1 dot \2
14
+ ellipsis: dot dot dot
15
+ equals: equals
16
+ number: number
17
+ percent: percent
18
+ plus: plus
19
+ slash: slash
20
+ star: star
21
+ currencies:
22
+ dollars: \1 dollars
23
+ dollars_cents: \1 dollars \2 cents
24
+ pounds: \1 pounds
25
+ pounds_pence: \1 pounds \2 pence
26
+ euros: \1 euros
27
+ euros_cents: \1 euros \2 cents
28
+ yen: \1 yen
29
+ html_entities:
30
+ amp: and
31
+ cent: " cents"
32
+ copy: (c)
33
+ deg: " degrees "
34
+ divide: " divided by "
35
+ double_quote: '"'
36
+ ellipsis: "..."
37
+ en_dash: "-"
38
+ em_dash: "--"
39
+ frac14: one fourth
40
+ frac12: half
41
+ frac34: three fourths
42
+ gt: ">"
43
+ lt: <
44
+ nbsp: " "
45
+ pound: " pounds "
46
+ reg: (r)
47
+ single_quote: "'"
48
+ times: x
49
+ trade: (tm)
50
+ yen: " yen "
51
+ vulgar_fractions:
52
+ half: half
53
+ one_third: one third
54
+ two_thirds: two thirds
55
+ one_fourth: one fourth
56
+ three_fourths: three fourths
57
+ one_fifth: one fifth
58
+ two_fifths: two fifths
59
+ three_fifths: three fifths
60
+ four_fifths: four fifths
61
+ one_sixth: one sixth
62
+ five_sixths: five sixths
63
+ one_eighth: one eighth
64
+ three_eighths: three eighths
65
+ five_eighths: five eighths
66
+ seven_eighths: seven eighths