kyanite 0.7.9 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,6 @@
1
+ == 0.8.0 2012-11-17
2
+ * added String#to_ascii with human-like handling of unicode special characters
3
+
1
4
  == 0.7.5 2012-11-14
2
5
  * added FSymbol class
3
6
 
@@ -4,7 +4,7 @@ http://bklippstein.github.com/kyanite/frames.html
4
4
  Welcome to Kyanite. It's a general toolbox like Facets or ActiveSupport.
5
5
 
6
6
  == Features
7
- [{String}] Support for special letters like german umlauts. Reduce UTF8 strings to ASCII or less. Compare Strings. Parse nested brackets. Database Helpers.
7
+ [{String}] Better support for special letters like german umlauts. Reduce UTF8 strings to ASCII or less like humans whould do, not just deleting the accents. Compare Strings. Parse nested brackets. Database Helpers.
8
8
  [{Class}] Tools for reflection. Convert {Class} <=> {String} <=> {Symbol}.
9
9
  [{Numeric} {Integer} {Float}] General tools.
10
10
  [{Range}] Invert selection of an Array or String.
@@ -30,6 +30,7 @@ $hoe = Hoe.spec Drumherum.project_name do
30
30
  extra_deps << ['yard', '>= 0.8.3']
31
31
  extra_deps << ['yard_klippstein_template', '>= 0.0.37']
32
32
  extra_deps << ['hashery', '>= 2.0.1']
33
+ extra_deps << ['unicode_utils', '>= 1.4.0']
33
34
  remote_rdoc_dir = '' # Release to root only one project
34
35
  urls = [[Drumherum.url_docs], [Drumherum.url_source]]
35
36
 
@@ -33,13 +33,7 @@ require 'kyanite/symbol' # size
33
33
  class Array
34
34
 
35
35
 
36
- # reverse of {String#to_array_of_codepoints}
37
- # @return [String]
38
- #
39
- def to_s_utf8
40
- self.pack("U*").encode('utf-8')
41
- end
42
-
36
+
43
37
 
44
38
  # Cuts the front portion, and returns the rest.
45
39
  # If the remainder is only one element, it' not returned as an array but as single element.
@@ -13,14 +13,7 @@ class String
13
13
 
14
14
 
15
15
  # @!group Cast
16
-
17
- # reverse of {Array#to_s_utf8}
18
- # @return [Array]
19
- #
20
- def to_array_of_codepoints
21
- self.codepoints.to_a
22
- end
23
-
16
+
24
17
 
25
18
  # Converts a string into the most plausible Identifier
26
19
  #
@@ -118,9 +111,10 @@ class String
118
111
 
119
112
  end
120
113
 
121
-
122
114
  # @!endgroup
123
115
 
116
+
117
+
124
118
  class NilClass
125
119
  def to_identifier; nil; end
126
120
  def to_integer; nil; end
@@ -6,10 +6,10 @@ if $0 == __FILE__
6
6
  end
7
7
 
8
8
 
9
- require 'kyanite/string/chars_const'
9
+ require 'kyanite/string/chars_const' unless defined? TR_FULL
10
10
  require 'kyanite/string/misc'
11
+ require 'unicode_utils/nfkd'
11
12
 
12
-
13
13
 
14
14
 
15
15
  class String
@@ -19,8 +19,62 @@ class String
19
19
  # ---------------------------------------------------------------------------------------------------------------------------------
20
20
  # @!group Clear / Format Text
21
21
  # See TestKyaniteStringChars for tests and examples.
22
+
23
+
24
+ # reverse of {Array#to_s_utf8}
25
+ # @return [Array]
26
+ #
27
+ def to_a
28
+ result = []
29
+ self.each_char do |c|
30
+ result << c
31
+ end
32
+ result
33
+ end
34
+
35
+ # reverse of {Array#to_s_utf8}
36
+ # @return [Array]
37
+ #
38
+ def to_array_of_codepoints
39
+ self.codepoints.to_a
40
+ end
41
+
42
+ # @return [Array]
43
+ def to_array_of_hex
44
+ self.unpack('U'*self.length).collect {|x| x.to_s 16}
45
+ end
46
+
47
+
48
+ # Reduces the string to a ASCII encoding. Example:
49
+ # ffi = "\uFB03"
50
+ # ix = "\u2168"
51
+ # high23="²³"
52
+ # high5 = "\u2075"
53
+ # all = ffi + ix + high23 + high5
54
+ # all.to_ascii
55
+ # => "ffiIX235"
56
+ #
57
+ # Based on +UnicodeUtils.nfkd+, but handles all characters from ISO/IEC 8859-1 and CP1252
58
+ # like humans do, not just deleting the accents. Example:
59
+ # "ÄÖÜäöüß".to_ascii
60
+ # => "AeOeUeaeoeuess"
61
+ #
62
+ # 1. Converts ÄÖÜäöüßàáâăäãāåạąæảấầắằ etc. to AeOeUeaeoeuessaaaaaaaaaaaaaaaa.
63
+ # 2. Then removes all non-Ascii-chars.
64
+ # 3. Then removes all non-printable Ascii-chars.
65
+ # 4. Caution: Also Newlines are removed.
66
+ # About 10 times slower than {#reduce94 reduce94}, but more accurate.
67
+ #
68
+ def to_ascii
69
+ result = self.to_ascii_extra_chars
70
+ result.tr!(TR_FULL, TR_REDUCED) # not necessary, only for performance
71
+ return UnicodeUtils.nfkd(result).delete('^ -~') # delete is faster than gsub
72
+ end
73
+
74
+
22
75
 
23
76
  # Reduces the string to a base94 encoding.
77
+ # About 10 times faster than with +UnicodeUtils+.
24
78
  # 1. Converts àáâăäãāåạąæảấầắằÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰ etc. to aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAA.
25
79
  # 2. Then removes all non-Ascii-chars.
26
80
  # 3. Then removes all non-printable Ascii-chars.
@@ -74,7 +128,7 @@ class String
74
128
  end
75
129
 
76
130
  self.gsub!( 'ß', options[:german_sz] ) if options[:german_sz]
77
- self.tr!('abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
131
+ self.tr!('abcdefghijklmnopqrstuvwxyz§', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')
78
132
 
79
133
  self.tr!(TR_FULL, TR_REDUCED.downcase)
80
134
  unless options[:space]
@@ -186,7 +240,16 @@ class String
186
240
 
187
241
  end
188
242
 
243
+ class Array
189
244
 
245
+ # reverse of {String#to_array_of_codepoints}
246
+ # @return [String]
247
+ #
248
+ def to_s_utf8
249
+ self.pack("U*").encode('utf-8')
250
+ end
251
+
252
+ end
190
253
 
191
254
 
192
255
  if defined? TransparentNil
@@ -216,11 +279,17 @@ if $0 == __FILE__ then
216
279
 
217
280
  #puts "Hallo"
218
281
  # puts 'Scheiße'.reduce94(:german_sz => 'z')
219
- test_down = 'àáâăäãāåạąæảấầắằабćĉčçċцчďðđдèéêěĕëēėęếеэфĝğġģгĥħхìíîĭïĩīıįijийĵюяķкĺľłļŀлмńňñņŋнòóôŏöõōøőơœопŕřŗрśŝšşсшщţťŧþтùúûŭüũūůűųưувŵýŷÿźżžжз'
220
- test_up = 'ÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮIJИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ'
282
+ # test_down = 'àáâăäãāåạąæảấầắằабćĉčçċцчďðđдèéêěĕëēėęếеэфĝğġģгĥħхìíîĭïĩīıįijийĵюяķкĺľłļŀлмńňñņŋнòóôŏöõōøőơœопŕřŗрśŝšşсшщţťŧþтùúûŭüũūůűųưувŵýŷÿźżžжз'
283
+ # test_up = 'ÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮIJИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ'
221
284
 
222
- puts "hallo".upcase!
285
+ # puts "hallo".upcase!
223
286
 
287
+ full = 'àáâăäãāåạąæảấầắằÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰ'
288
+ reduced = 'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAA'
289
+
290
+ full.each_char do |c|
291
+ puts c.noaccents
292
+ end
224
293
 
225
294
  end
226
295
 
@@ -3,40 +3,112 @@
3
3
  if $0 == __FILE__
4
4
  require 'drumherum'
5
5
  smart_init
6
+ require 'perception'
6
7
  end
7
8
 
8
-
9
9
  require 'hashery'
10
+ require 'unicode_utils/char_type'
10
11
 
11
12
 
12
13
  unless defined?(TR_UPCASE_ALL_REGEXP)
14
+
15
+ leerzeichen = "\u2420\u2423\u00a0\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u202f\u205f\u3000"
16
+ klammer_auf = "\u227a\u226a\u3008\u276c\u2329\u25c1\u25c0"
17
+ klammer_zu = "\u227b\u226b\u3009\u276d\u232a\u25b7\u25b6"
18
+
19
+ # Sowohl reduce94 als auch to_ascii werden diese Zeichen übersetzen.
20
+ # Zeichen, die TR_FULL ergänzen und die UnicodeUtils.nfkd nicht korrekt umsetzt.
21
+ tr_full_b = %q{£₤¢‹¥›•«×»÷‚‘ƒ’ˆ§´¡„¿“¦”†‡µ′″°¤∗·⋅} + leerzeichen + klammer_auf + klammer_zu
22
+ tr_reduced_b = %q{LLc"Y"*"*"/''f'^P'!"?"|"~~u'"~~***} + (" "*leerzeichen.length) + ("<"*klammer_auf.length) + (">"*klammer_zu.length)
23
+
24
+ # Nur to_ascii wird diese Zeichen übersetzen.
25
+ # Zeichen, die in TR_FULL schon drin sind und die UnicodeUtils.nfkd nicht korrekt umsetzt
26
+ tr_full_c = %q{ØøðđÐĐħĦıĸłŁŧþŦÞаАбБцчЦЧдДеэЕЭфФгГхХийИЙюяЮЯкКлЛмМнНоОпПрРсшщСШЩтТуУвВжзЖЗ}
27
+ tr_reduced_c = %q{OoddDDhHiklLttTTaAbBccCCdDeeEEfFgGhHiiIIjjJJkKlLmMnNoOpPrRsssSSStTuUvVzzZZ}
28
+
29
+
30
+
31
+
32
+ # Nur to_ascii wird diese Zeichen übersetzen.
33
+ TR_EXTRA_CHARS = [
34
+ [/ß/, 'ss'],
35
+ [/Ö/, 'Oe'],
36
+ [/Ü/, 'Ue'],
37
+ [/Ä/, 'Ae'],
38
+ [/ö/, 'oe'],
39
+ [/ü/, 'ue'],
40
+ [/ä/, 'ae'],
41
+ [/€/, 'EUR'],
42
+ [/æ/, 'ae'],
43
+ [/Æ/, 'AE'],
44
+ [/œ/, 'oe'],
45
+ [/Œ/, 'OE'],
46
+ [/ŋ/, 'nj'],
47
+ [/Ŋ/, 'NJ'],
48
+ [/Š/, 'Sh'],
49
+ [/š/, 'sh'],
50
+ [/Ž/, 'Zh'],
51
+ [/ž/, 'zh'],
52
+ [/Ḃ/, 'Bh'],
53
+ [/ḃ/, 'bh'],
54
+ [/Ċ/, 'Ch'],
55
+ [/ċ/, 'ch'],
56
+ [/Ḋ/, 'Dh'],
57
+ [/ḋ/, 'dh'],
58
+ [/Ḟ/, 'Fh'],
59
+ [/ḟ/, 'fh'],
60
+ [/Ġ/, 'Gh'],
61
+ [/ġ/, 'gh'],
62
+ [/Ṁ/, 'Mh'],
63
+ [/ṁ/, 'mh'],
64
+ [/Ṡ/, 'Sh'],
65
+ [/ṡ/, 'sh'],
66
+ [/Ṫ/, 'Th'],
67
+ [/ṫ/, 'th'],
68
+ [/©/, '(c)'],
69
+ [/®/, '(r)'],
70
+ [/≤/, '<='],
71
+ [/≥/, '>='],
72
+ [/±/, '+/-'],
73
+ [/¼/, '1/4'],
74
+ [/½/, '1/2'],
75
+ [/¾/, '3/4'],
76
+ [/‰/, '%%'],
77
+ [/˜/, '~'],
78
+ [/[¬−‐‑‒–—―─]/, '-'] # macht Ärger und muss am Ende bleiben
79
+ ]
80
+ patterns = TR_EXTRA_CHARS.collect { |search, replace| search }
81
+ RE_EXTRA_CHARS = Regexp.union(*patterns)
82
+
83
+
84
+
13
85
  base = Hashery::Dictionary.new
14
- base['a'] = ' àáâă äãā åạą æ ảấầắằ а '
15
- base['A'] = ' ÀÁÂĂ ÄÃĀ ÅẠĄ Æ ẢẤẦẮẰ А '
16
- base['b'] = ' б '
17
- base['B'] = ' Б '
86
+ base['a'] = ' àáâă äãā åạą ảấầắằ а ª æ '
87
+ base['A'] = ' ÀÁÂĂ ÄÃĀ ÅẠĄ ẢẤẦẮẰ А ª Æ '
88
+ base['b'] = ' ḃб '
89
+ base['B'] = ' ḂБ '
18
90
  base['c'] = ' ćĉč çċ цч '
19
91
  base['C'] = ' ĆĈČ ÇĊ ЦЧ '
20
- base['d'] = ' ď ðđ д '
21
- base['D'] = ' Ď ÐĐ Д '
92
+ base['d'] = ' ḋď ðđ д '
93
+ base['D'] = ' ḊĎ ÐĐ Д '
22
94
  base['e'] = ' èéêěĕ ëēėę ế еэ '
23
95
  base['E'] = ' ÈÉÊĚĔ ËĒĖĘ Ế ЕЭ '
24
- base['f'] = ' ф '
25
- base['F'] = ' Ф '
96
+ base['f'] = ' ḟф '
97
+ base['F'] = ' ḞФ '
26
98
  base['g'] = ' ĝğġ ģ г '
27
99
  base['G'] = ' ĜĞĠ Ģ Г '
28
100
  base['h'] = ' ĥħ х '
29
101
  base['H'] = ' ĤĦ Х '
30
- base['i'] = ' ìíîĭ ïĩīı į ij ий'
31
- base['I'] = ' ÌÍÎĬ ÏĨĪİ Į IJ ИЙ'
102
+ base['i'] = ' ìíîĭ ïĩīı į ий'
103
+ base['I'] = ' ÌÍÎĬ ÏĨĪİ Į ИЙ'
32
104
  base['j'] = ' ĵ юя '
33
105
  base['J'] = ' Ĵ ЮЯ '
34
106
  base['k'] = ' ķĸ к '
35
107
  base['K'] = ' Ķĸ К '
36
108
  base['l'] = ' ĺ ľłļŀ л '
37
109
  base['L'] = ' Ĺ ĽŁĻĿ Л '
38
- base['m'] = ' м '
39
- base['M'] = ' М '
110
+ base['m'] = ' ṁм '
111
+ base['M'] = ' ṀМ '
40
112
  base['n'] = ' ńň ñņʼnŋ н '
41
113
  base['N'] = ' ŃŇ ÑŅʼnŊ Н '
42
114
  base['o'] = ' òóôŏ öõō øőơ œ о '
@@ -47,10 +119,10 @@ unless defined?(TR_UPCASE_ALL_REGEXP)
47
119
  base['Q'] = nil
48
120
  base['r'] = ' ŕř ŗ р '
49
121
  base['R'] = ' ŔŘ Ŗ Р '
50
- base['s'] = ' śŝš ßş сшщ '
51
- base['S'] = ' ŚŜŠ ߪ СШЩ '
52
- base['t'] = ' ţťŧþ т '
53
- base['T'] = ' ŢŤŦÞ Т '
122
+ base['s'] = ' ṡśŝš ßş сшщ '
123
+ base['S'] = ' ṠŚŜŠ ߪ СШЩ '
124
+ base['t'] = ' ṫţťŧþ т '
125
+ base['T'] = ' ṪŢŤŦÞ Т '
54
126
  base['u'] = ' ùúûŭ üũū ůűųư у '
55
127
  base['U'] = ' ÙÚÛŬ ÜŨŪ ŮŰŲƯ У '
56
128
  base['v'] = ' в'
@@ -131,51 +203,113 @@ TR_DOWNCASE_ONLY = tr_downcase_only
131
203
  end
132
204
 
133
205
 
134
- TR_UPCASE = tr_upcase2
135
- TR_DOWNCASE = tr_downcase2
136
- TR_FULL = tr_full2
137
- TR_REDUCED = tr_reduced2
206
+ TR_UPCASE = tr_upcase2
207
+ TR_DOWNCASE = tr_downcase2
208
+ TR_FULL = tr_full2 + tr_full_b
209
+ TR_REDUCED = tr_reduced2 + tr_reduced_b
210
+ TR_FULL_TO_ASCII = tr_full_b + tr_full_c
211
+ TR_REDUCED_TO_ASCII = tr_reduced_b + tr_reduced_c
138
212
  TR_UPCASE_ALL_REGEXP = /^[A-ZÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮIJИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ]/
139
213
 
214
+
215
+
216
+ LANG_SPECIAL_CHARS = {
217
+ :german => ["ÄÖÜäöüß", "AeOeUeaeoeuess"],
218
+ :dutch => ["IJij", "IJij"],
219
+ :estonian => ["ŠšŽž", "ShshZhzh"],
220
+ :finnish => ["ŠšŽž", "ShshZhzh"],
221
+ :french => ["ŒœŸ", "OEoeY"],
222
+ :hungarian => ["ŐőŰű", "OoUu"],
223
+ :latin => ["ĀāĒēĪīŌōŪū","AaEeIiOoUu"],
224
+ :finnish => ["ĀāĒēĪīŌōŪū","AaEeIiOoUu"],
225
+ :turkish => ["İıĞğŞş", "IiGgSs"],
226
+ :welsh => ["ẀẁẂẃŴŵŶŷ", "WwWwWwYy"],
227
+ :irish => ["ḂḃĊċḊḋḞḟĠġṀṁṠṡṪṫ", "BhbhChchDhdhFhfhGhghMhmhShshThth"]
228
+ }
229
+
230
+ # :irish => ["ḂḃḊḋḞḟṀṁṠṡṪṫ", "BhbhChchDhdhFhfhGhghMhmhShshThth"]
231
+
232
+
233
+
140
234
  end # unless defined?
141
235
 
142
236
 
237
+ class String
143
238
 
239
+ # @private
240
+ def to_ascii_extra_chars
241
+ result = tr(TR_FULL_TO_ASCII, TR_REDUCED_TO_ASCII)
242
+ result.gsub(RE_EXTRA_CHARS) do |match|
243
+ TR_EXTRA_CHARS.detect{ |search, replace| search =~ match}[1]
244
+ end
245
+ end
246
+
247
+ # @private
248
+ def to_ascii_minus
249
+
250
+ end
251
+
252
+ end # class
144
253
 
145
254
 
146
255
 
147
256
  # -----------------------------------------------------------------------------------------
148
- # Ausprobieren
257
+ # TR_EXTRA_CHARS und TR_FULL manuell prüfen
149
258
  #
150
259
  if $0 == __FILE__ then
260
+ require 'kyanite/string/chars'
261
+ require 'kyanite/set'
151
262
 
152
-
153
- puts TR_DOWNCASE_ONLY.inspect
154
-
155
-
156
- # require 'perception'
157
- # rawlog "\n----------------------------------------------------------\n\n"
158
263
 
159
- # rawlog 'TR_DOWNCASE_ONLY= '
160
- # rawlog TR_DOWNCASE_ONLY
161
- # rawlog "\n"
162
-
163
- # rawlog 'TR_FULL= '
164
- # rawlog TR_FULL
165
- # rawlog "\n"
166
- # rawlog 'TR_REDUCED= '
167
- # rawlog TR_REDUCED
168
- # rawlog "\n"
169
-
170
- # rawlog 'TR_UPCASE= '
171
- # rawlog TR_UPCASE
172
- # rawlog "\n"
173
- # rawlog 'TR_DOWNCASE= '
174
- # rawlog TR_DOWNCASE
175
- # rawlog "\n"
176
-
177
-
178
264
 
265
+ # Überprüfe TR_EXTRA_CHARS
266
+ see
267
+ see "Überprüfe TR_EXTRA_CHARS"
268
+ see "========================"
269
+ see
270
+ see "defined in", "Dup if <>0", "Trivial?", "Hex Code", "Character", "reduce94", "to_ascii", "Klassifizierung"
271
+ startline = 14
272
+ i = 0
273
+ all = ""
274
+ TR_EXTRA_CHARS[0..-2].each do | a |
275
+ c = a[0].to_s[7]
276
+ all += c
277
+ see i+startline, # Definitionszeile
278
+ all.to_a.to_set.size-i-1, # Dup-Detector
279
+ (c.to_array_of_codepoints[0] <= 127 ? 'TRIVIAL':''), # Trivial-Detector
280
+ c.to_array_of_hex, # sein Code in HEX
281
+ c, # das Zeichen
282
+ c.reduce94, # was reduce94 daraus macht
283
+ c.to_ascii, # was to_ascii daraus macht
284
+ UnicodeUtils.char_type(c)
285
+
286
+ i+=1
287
+ end
288
+
289
+ # Überprüfe TR_FULL
290
+ see
291
+ see
292
+ see
293
+ see "Überprüfe TR_FULL"
294
+ see "================="
295
+ see
296
+ see "Nr", "Dup if <>0", "Trivial?", "Hex Code", "Character", "reduce94", "to_ascii", "Klassifizierung"
297
+ i = 0
298
+ all = ""
299
+ #TR_FULL_TO_ASCII.each_char do |c|
300
+ TR_FULL.each_char do |c|
301
+ all += c
302
+ see i,
303
+ all.to_a.to_set.size-i-1, # Dup-Detector
304
+ (c.to_array_of_codepoints[0] <= 127 ? 'TRIVIAL':''), # Trivial-Detector
305
+ c.to_array_of_hex, # sein Code in HEX
306
+ c, # das Zeichen
307
+ c.reduce94, # was reduce94 daraus macht
308
+ c.to_ascii, # was to_ascii daraus macht
309
+ UnicodeUtils.char_type(c)
310
+
311
+ i+=1
312
+ end
179
313
 
180
314
 
181
315
 
@@ -14,11 +14,7 @@ require 'kyanite/array'
14
14
  # @!macro string
15
15
  class TestKyaniteStringCast < UnitTest
16
16
 
17
- def test_to_array_of_codepoints
18
- test = "H¿llÛ"
19
- assert_equal [72, 191, 108, 108, 219], test.to_array_of_codepoints
20
- assert_equal test, [72, 191, 108, 108, 219].to_s_utf8
21
- end
17
+
22
18
 
23
19
  def test_to_nil
24
20
  assert_equal 'e', 'e'.to_nil
@@ -3,6 +3,7 @@
3
3
  if $0 == __FILE__
4
4
  require 'drumherum'
5
5
  smart_init
6
+ require 'perception'
6
7
  end
7
8
  require 'drumherum/unit_test'
8
9
  require 'kyanite/string/chars'
@@ -18,48 +19,175 @@ class TestKyaniteStringChars < UnitTest
18
19
  # @!group clear / format text
19
20
  #
20
21
 
22
+ def test_TR_EXTRA_CHARS
23
+ startline = 23 # Zeilennummer in der TR_EXTRA_CHARS definiert wird
24
+ i = 0
25
+ all = ""
26
+ TR_EXTRA_CHARS.each do | a |
27
+ c = a[0].to_s[7]
28
+ all += c
29
+ assert_equal 0, all.to_a.to_set.size-i-1, "TR_EXTRA_CHARS: Dup in Zeile #{i+startline} Zeichen #{c}"
30
+ #assert c.to_array_of_codepoints[0] > 127, "TR_EXTRA_CHARS: Trivialität in Zeile #{i+startline} Zeichen #{c}"
31
+ i+=1
32
+ end
33
+ end
34
+
35
+
36
+ def test_TR_FULL
37
+ assert_equal TR_FULL.length, TR_REDUCED.length
38
+ i = 0
39
+ all = ""
40
+ TR_FULL.each_char do | c |
41
+ r = TR_REDUCED[i]
42
+ all += c
43
+ #see "Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
44
+ assert_equal 0, all.to_a.to_set.size-i-1, "TR_FULL: Dup in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
45
+ assert c.to_array_of_codepoints[0] > 127, "TR_FULL: Trivialität in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
46
+ assert r.to_array_of_codepoints[0] <= 127, "TR_FULL: Zeichen Nr. #{i} Zeichen #{c} >> #{r} wird nicht in ASCII umgesetzt"
47
+ assert_equal c.reduce94, c.to_ascii[0]
48
+ i+=1
49
+ end
50
+ end
51
+
52
+
53
+ def test_TR_FULL_TO_ASCII
54
+ assert_equal TR_FULL_TO_ASCII.length, TR_REDUCED_TO_ASCII.length
55
+ i = 0
56
+ all = ""
57
+ TR_FULL_TO_ASCII.each_char do | c |
58
+ r = TR_REDUCED_TO_ASCII[i]
59
+ all += c
60
+ #see "Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
61
+ assert_equal 0, all.to_a.to_set.size-i-1, "TR_FULL_TO_ASCII: Dup in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
62
+ assert c.to_array_of_codepoints[0] > 127, "TR_FULL_TO_ASCII: Trivialität in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
63
+ assert r.to_array_of_codepoints[0] <= 127, "TR_FULL_TO_ASCII: Zeichen Nr. #{i} Zeichen #{c} >> #{r} wird nicht in ASCII umgesetzt"
64
+ i+=1
65
+ end
66
+ end
67
+
68
+
69
+ def test_to_array_of_codepoints
70
+ test = "H¿llÛ"
71
+ assert_equal [72, 191, 108, 108, 219], test.to_array_of_codepoints
72
+ assert_equal test, [72, 191, 108, 108, 219].to_s_utf8
73
+ end
74
+
75
+ def test_to_array_of_hex
76
+ euro = "\u20ac"
77
+ ffi = "\uFB03"
78
+ ix = "\u2168"
79
+ high5 = "\u2075"
80
+ all = euro + ffi + ix + high5
81
+ assert_equal ["20ac", "fb03", "2168", "2075"], all.to_array_of_hex
82
+ end
83
+
21
84
 
22
85
 
23
- def test_reduce94_a
24
- full = 'àáâăäãāåạąæảấầắằÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰ'
25
- reduced = 'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAA'
26
- assert_equal reduced, full.reduce94
86
+ def test_to_ascii_a
87
+ full = 'ªàáâăãāåạąảấầắằÀÁÂĂÃĀÅẠĄẢẤẦẮẰ'
88
+ reduced = 'aaaaaaaaaaaaaaaAAAAAAAAAAAAAA'
89
+ assert_equal reduced, full.reduce94
90
+ assert_equal reduced, full.to_ascii
27
91
  end
28
92
 
29
- def test_reduce94_b
30
- full = 'ćĉčçċĆĈČÇĊďðđĎÐĐèéêěĕëēėęếÈÉÊĚĔËĒĖĘẾ'
31
- reduced = 'cccccCCCCCdddDDDeeeeeeeeeeEEEEEEEEEE'
32
- assert_equal reduced, full.reduce94
93
+ def test_to_ascii_b
94
+ full = 'ćĉčçċĆĈČÇĊďĎèéêěĕëēėęếÈÉÊĚĔËĒĖĘẾ'
95
+ reduced1 = 'cccccCCCCCdDeeeeeeeeeeEEEEEEEEEE'
96
+ reduced2 = 'ccccchCCCCChdDeeeeeeeeeeEEEEEEEEEE'
97
+ assert_equal reduced1, full.reduce94
98
+ assert_equal reduced2, full.to_ascii
33
99
  end
34
100
 
35
- def test_reduce94_c
36
- full = 'ĝğġģĜĞĠĢĥħĤĦìíîĭïĩīıįijÌÍÎĬÏĨĪİĮIJĵĴķĸĶĺľłļŀĹĽŁĻĿ'
37
- reduced = 'ggggGGGGhhHHiiiiiiiiiiIIIIIIIIIIjJkkKlllllLLLLL'
38
- assert_equal reduced, full.reduce94
101
+ def test_to_ascii_c
102
+ full = 'ĝğġģĜĞĠĢĥĤìíîĭïĩīįÌÍÎĬÏĨĪİĮĵĴķĶĺľļŀĹĽĻĿ'
103
+ reduced1 = 'ggggGGGGhHiiiiiiiiIIIIIIIIIjJkKllllLLLL'
104
+ reduced2 = 'ggghgGGGhGhHiiiiiiiiIIIIIIIIIjJkKllllLLLL'
105
+ assert_equal reduced1, full.reduce94
106
+ assert_equal reduced2, full.to_ascii
39
107
  end
40
108
 
41
- def test_reduce94_e
42
- full = 'ńňñņʼnŋŃŇÑŅŊòóôŏöõōøőơœÒÓÔŎÖÕŌØŐƠŒ'
43
- reduced = 'nnnnnnNNNNNoooooooooooOOOOOOOOOOO'
109
+ def test_to_ascii_e
110
+ full = 'ńňñņʼnŃŇÑŅòóôŏõōőơÒÓÔŎÕŌŐƠ'
111
+ reduced = 'nnnnnNNNNooooooooOOOOOOOO'
44
112
  assert_equal reduced, full.reduce94
113
+ assert_equal reduced, full.to_ascii
45
114
  end
46
115
 
47
- def test_reduce94_f
48
- full = 'ŕřŗŔŘŖśŝšßşŚŜŠŞţťŧþŢŤŦÞùúûŭüũūůűųưÙÚÛŬÜŨŪŮŰŲƯŵŴýŷÿÝŶŸźżžŹŻŽ'
49
- reduced = 'rrrRRRsssssSSSSttttTTTTuuuuuuuuuuuUUUUUUUUUUUwWyyyYYYzzzZZZ'
50
- assert_equal reduced, full.reduce94
116
+ def test_to_ascii_f
117
+ full = 'ŕřŗŔŘŖśŝšşŚŜŠŞţťŢŤùúûŭũūůűųưÙÚÛŬŨŪŮŰŲƯŵŴýŷÿÝŶŸźżžŹŻŽ'
118
+ reduced1 = 'rrrRRRssssSSSSttTTuuuuuuuuuuUUUUUUUUUUwWyyyYYYzzzZZZ'
119
+ reduced2 = 'rrrRRRssshsSSShSttTTuuuuuuuuuuUUUUUUUUUUwWyyyYYYzzzhZZZh'
120
+ assert_equal reduced1, full.reduce94
121
+ assert_equal reduced2, full.to_ascii
51
122
  end
52
123
 
124
+ def test_to_ascii_zusammengesetzt
125
+ full = 'ijIJſ…'
126
+ reduced = 'ijIJs...'
127
+ assert_equal reduced, full.to_ascii
128
+ end
129
+
130
+ def test_to_ascii_same_same
131
+ same_same = '^!"$%&/()=?@*+~#<>|,;:.-_ {[]}\\'
132
+ assert_equal same_same, same_same.to_ascii
133
+ same_same = "'0123456789"
134
+ assert_equal same_same, same_same.to_ascii
135
+ same_same = 'abcdefghijklmnopqrstuvwxyz'
136
+ assert_equal same_same, same_same.to_ascii
137
+ same_same = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
138
+ assert_equal same_same, same_same.to_ascii
139
+ end
140
+
141
+
142
+ def test_to_ascii_same_same
143
+ full = '¯¨'
144
+ reduced = ' ' * full.length
145
+ assert_equal 2, full.length
146
+ assert_equal reduced, full.to_ascii
147
+ end
148
+
149
+ def test_to_ascii_s
150
+ ffi = "\uFB03"
151
+ ix = "\u2168"
152
+ high23="²³"
153
+ high5 = "\u2075"
154
+ full = ffi + ix + high23 + high5 + "€ßÖÜÄöüä"
155
+ reduced1 = "sOUAoua"
156
+ reduced2 = "ffiIX235EURssOeUeAeoeueae"
157
+ assert_equal reduced1, full.reduce94
158
+ assert_equal reduced2, full.to_ascii
159
+ end
160
+
161
+ def test_LANG_SPECIAL_CHARS
162
+ LANG_SPECIAL_CHARS .each do | lang, (full, reduced) |
163
+ #see lang, full, reduced, full.to_ascii, full.reduce94
164
+ assert_equal reduced, full.to_ascii
165
+ end
166
+ end
167
+
168
+ def test_spaces
169
+ spaces = "\u0020\u00a0\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u202f\u205f\u3000\u2420\u2423"
170
+ assert_equal spaces.to_ascii, " " * spaces.length
171
+ assert_equal spaces.reduce94, " " * spaces.length
172
+ end
173
+
174
+
175
+ def test_minus_signs
176
+ minus = "\u00ac\u2212\u2010\u2011\u2012\u2013\u2014\u2015\u2500"
177
+ assert_equal minus.to_ascii, "-" * minus.length
178
+ #assert_equal spaces.reduce94, " " * spaces.length
179
+ end
180
+
53
181
 
54
182
 
55
183
 
56
184
  def test_reduce94_full
57
185
  full = <<ENDOFSTRING
58
- àáâăäãāåạąæảấầắằÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰćĉčçċĆĈČÇĊďðđĎÐĐèéêěĕëēėęếÈÉÊĚĔËĒĖĘẾĝğġģĜĞĠĢĥħĤĦìíîĭïĩīıįijÌÍÎĬÏĨĪİĮIJĵĴķĶĺľłļŀĹĽŁĻĿńňñņʼnŋŃŇÑŅŊòóôŏöõōøőơœÒÓÔŎÖÕŌØŐƠŒŕřŗŔŘŖśŝšßşŚŜŠŞţťŧþŢŤŦÞùúûŭüũūůűųưÙÚÛŬÜŨŪŮŰŲƯŵŴýŷÿÝŶŸźżžŹŻŽ
186
+ àáâăäãāåạąảấầắằÀÁÂĂÄÃĀÅẠĄẢẤẦẮẰćĉčçċĆĈČÇĊďðđĎÐĐèéêěĕëēėęếÈÉÊĚĔËĒĖĘẾĝğġģĜĞĠĢĥħĤĦìíîĭïĩīıįÌÍÎĬÏĨĪİĮĵĴķĶĺľłļŀĹĽŁĻĿńňñņʼnŋŃŇÑŅŊòóôŏöõōøőơœÒÓÔŎÖÕŌØŐƠŒŕřŗŔŘŖśŝšßşŚŜŠŞţťŧþŢŤŦÞùúûŭüũūůűųưÙÚÛŬÜŨŪŮŰŲƯŵŴýŷÿÝŶŸźżžŹŻŽ
59
187
  ENDOFSTRING
60
188
 
61
189
  reduced = <<ENDOFSTRING
62
- aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAcccccCCCCCdddDDDeeeeeeeeeeEEEEEEEEEEggggGGGGhhHHiiiiiiiiiiIIIIIIIIIIjJkKlllllLLLLLnnnnnnNNNNNoooooooooooOOOOOOOOOOOrrrRRRsssssSSSSttttTTTTuuuuuuuuuuuUUUUUUUUUUUwWyyyYYYzzzZZZ
190
+ aaaaaaaaaaaaaaaAAAAAAAAAAAAAAAcccccCCCCCdddDDDeeeeeeeeeeEEEEEEEEEEggggGGGGhhHHiiiiiiiiiIIIIIIIIIjJkKlllllLLLLLnnnnnnNNNNNoooooooooooOOOOOOOOOOOrrrRRRsssssSSSSttttTTTTuuuuuuuuuuuUUUUUUUUUUUwWyyyYYYzzzZZZ
63
191
  ENDOFSTRING
64
192
 
65
193
  full = full.chomp
@@ -160,8 +288,8 @@ ENDOFSTRING
160
288
 
161
289
 
162
290
  def test_downcase_upcase
163
- test_down = 'àáâăäãāåạąæảấầắằабćĉčçċцчďðđдèéêěĕëēėęếеэфĝğġģгĥħхìíîĭïĩīıįijийĵюяķкĺľłļŀлмńňñņŋнòóôŏöõōøőơœопŕřŗрśŝšşсшщţťŧþтùúûŭüũūůűųưувŵýŷÿźżžжз'
164
- test_up = 'ÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮIJИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ'
291
+ test_down = 'àáâăäãāåạąảấầắằабćĉčçċцчďðđдèéêěĕëēėęếеэфĝğġģгĥħхìíîĭïĩīıįийĵюяķкĺľłļŀлмńňñņŋнòóôŏöõōøőơœопŕřŗрśŝšşсшщţťŧþтùúûŭüũūůűųưувŵýŷÿźżžжз'
292
+ test_up = 'ÀÁÂĂÄÃĀÅẠĄẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ'
165
293
 
166
294
  # Bescheid sagen, sobald Ruby oder ActiveSupport von sich aus funktionieren
167
295
  assert_not_equal test_down, test_up.downcase
data/version.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Kyanite
4
4
 
5
- VERSION = '0.7.9'
5
+ VERSION = '0.8.0'
6
6
 
7
7
  end
8
8
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kyanite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.9
4
+ version: 0.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-14 00:00:00.000000000 Z
12
+ date: 2012-11-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: drumherum
@@ -139,6 +139,22 @@ dependencies:
139
139
  - - ! '>='
140
140
  - !ruby/object:Gem::Version
141
141
  version: 2.0.1
142
+ - !ruby/object:Gem::Dependency
143
+ name: unicode_utils
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: 1.4.0
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: 1.4.0
142
158
  - !ruby/object:Gem::Dependency
143
159
  name: rdoc
144
160
  requirement: !ruby/object:Gem::Requirement