kyanite 0.7.9 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +3 -0
- data/README.rdoc +1 -1
- data/Rakefile.rb +1 -0
- data/lib/kyanite/array.rb +1 -7
- data/lib/kyanite/string/cast.rb +3 -9
- data/lib/kyanite/string/chars.rb +75 -6
- data/lib/kyanite/string/chars_const.rb +181 -47
- data/test/string/test_cast.rb +1 -5
- data/test/string/test_chars.rb +151 -23
- data/version.rb +1 -1
- metadata +18 -2
data/History.rdoc
CHANGED
data/README.rdoc
CHANGED
@@ -4,7 +4,7 @@ http://bklippstein.github.com/kyanite/frames.html
|
|
4
4
|
Welcome to Kyanite. It's a general toolbox like Facets or ActiveSupport.
|
5
5
|
|
6
6
|
== Features
|
7
|
-
[{String}]
|
7
|
+
[{String}] Better support for special letters like german umlauts. Reduce UTF8 strings to ASCII or less like humans whould do, not just deleting the accents. Compare Strings. Parse nested brackets. Database Helpers.
|
8
8
|
[{Class}] Tools for reflection. Convert {Class} <=> {String} <=> {Symbol}.
|
9
9
|
[{Numeric} {Integer} {Float}] General tools.
|
10
10
|
[{Range}] Invert selection of an Array or String.
|
data/Rakefile.rb
CHANGED
@@ -30,6 +30,7 @@ $hoe = Hoe.spec Drumherum.project_name do
|
|
30
30
|
extra_deps << ['yard', '>= 0.8.3']
|
31
31
|
extra_deps << ['yard_klippstein_template', '>= 0.0.37']
|
32
32
|
extra_deps << ['hashery', '>= 2.0.1']
|
33
|
+
extra_deps << ['unicode_utils', '>= 1.4.0']
|
33
34
|
remote_rdoc_dir = '' # Release to root only one project
|
34
35
|
urls = [[Drumherum.url_docs], [Drumherum.url_source]]
|
35
36
|
|
data/lib/kyanite/array.rb
CHANGED
@@ -33,13 +33,7 @@ require 'kyanite/symbol' # size
|
|
33
33
|
class Array
|
34
34
|
|
35
35
|
|
36
|
-
|
37
|
-
# @return [String]
|
38
|
-
#
|
39
|
-
def to_s_utf8
|
40
|
-
self.pack("U*").encode('utf-8')
|
41
|
-
end
|
42
|
-
|
36
|
+
|
43
37
|
|
44
38
|
# Cuts the front portion, and returns the rest.
|
45
39
|
# If the remainder is only one element, it' not returned as an array but as single element.
|
data/lib/kyanite/string/cast.rb
CHANGED
@@ -13,14 +13,7 @@ class String
|
|
13
13
|
|
14
14
|
|
15
15
|
# @!group Cast
|
16
|
-
|
17
|
-
# reverse of {Array#to_s_utf8}
|
18
|
-
# @return [Array]
|
19
|
-
#
|
20
|
-
def to_array_of_codepoints
|
21
|
-
self.codepoints.to_a
|
22
|
-
end
|
23
|
-
|
16
|
+
|
24
17
|
|
25
18
|
# Converts a string into the most plausible Identifier
|
26
19
|
#
|
@@ -118,9 +111,10 @@ class String
|
|
118
111
|
|
119
112
|
end
|
120
113
|
|
121
|
-
|
122
114
|
# @!endgroup
|
123
115
|
|
116
|
+
|
117
|
+
|
124
118
|
class NilClass
|
125
119
|
def to_identifier; nil; end
|
126
120
|
def to_integer; nil; end
|
data/lib/kyanite/string/chars.rb
CHANGED
@@ -6,10 +6,10 @@ if $0 == __FILE__
|
|
6
6
|
end
|
7
7
|
|
8
8
|
|
9
|
-
require 'kyanite/string/chars_const'
|
9
|
+
require 'kyanite/string/chars_const' unless defined? TR_FULL
|
10
10
|
require 'kyanite/string/misc'
|
11
|
+
require 'unicode_utils/nfkd'
|
11
12
|
|
12
|
-
|
13
13
|
|
14
14
|
|
15
15
|
class String
|
@@ -19,8 +19,62 @@ class String
|
|
19
19
|
# ---------------------------------------------------------------------------------------------------------------------------------
|
20
20
|
# @!group Clear / Format Text
|
21
21
|
# See TestKyaniteStringChars for tests and examples.
|
22
|
+
|
23
|
+
|
24
|
+
# reverse of {Array#to_s_utf8}
|
25
|
+
# @return [Array]
|
26
|
+
#
|
27
|
+
def to_a
|
28
|
+
result = []
|
29
|
+
self.each_char do |c|
|
30
|
+
result << c
|
31
|
+
end
|
32
|
+
result
|
33
|
+
end
|
34
|
+
|
35
|
+
# reverse of {Array#to_s_utf8}
|
36
|
+
# @return [Array]
|
37
|
+
#
|
38
|
+
def to_array_of_codepoints
|
39
|
+
self.codepoints.to_a
|
40
|
+
end
|
41
|
+
|
42
|
+
# @return [Array]
|
43
|
+
def to_array_of_hex
|
44
|
+
self.unpack('U'*self.length).collect {|x| x.to_s 16}
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
# Reduces the string to a ASCII encoding. Example:
|
49
|
+
# ffi = "\uFB03"
|
50
|
+
# ix = "\u2168"
|
51
|
+
# high23="²³"
|
52
|
+
# high5 = "\u2075"
|
53
|
+
# all = ffi + ix + high23 + high5
|
54
|
+
# all.to_ascii
|
55
|
+
# => "ffiIX235"
|
56
|
+
#
|
57
|
+
# Based on +UnicodeUtils.nfkd+, but handles all characters from ISO/IEC 8859-1 and CP1252
|
58
|
+
# like humans do, not just deleting the accents. Example:
|
59
|
+
# "ÄÖÜäöüß".to_ascii
|
60
|
+
# => "AeOeUeaeoeuess"
|
61
|
+
#
|
62
|
+
# 1. Converts ÄÖÜäöüßàáâăäãāåạąæảấầắằ etc. to AeOeUeaeoeuessaaaaaaaaaaaaaaaa.
|
63
|
+
# 2. Then removes all non-Ascii-chars.
|
64
|
+
# 3. Then removes all non-printable Ascii-chars.
|
65
|
+
# 4. Caution: Also Newlines are removed.
|
66
|
+
# About 10 times slower than {#reduce94 reduce94}, but more accurate.
|
67
|
+
#
|
68
|
+
def to_ascii
|
69
|
+
result = self.to_ascii_extra_chars
|
70
|
+
result.tr!(TR_FULL, TR_REDUCED) # not necessary, only for performance
|
71
|
+
return UnicodeUtils.nfkd(result).delete('^ -~') # delete is faster than gsub
|
72
|
+
end
|
73
|
+
|
74
|
+
|
22
75
|
|
23
76
|
# Reduces the string to a base94 encoding.
|
77
|
+
# About 10 times faster than with +UnicodeUtils+.
|
24
78
|
# 1. Converts àáâăäãāåạąæảấầắằÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰ etc. to aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAA.
|
25
79
|
# 2. Then removes all non-Ascii-chars.
|
26
80
|
# 3. Then removes all non-printable Ascii-chars.
|
@@ -74,7 +128,7 @@ class String
|
|
74
128
|
end
|
75
129
|
|
76
130
|
self.gsub!( 'ß', options[:german_sz] ) if options[:german_sz]
|
77
|
-
self.tr!('abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
131
|
+
self.tr!('abcdefghijklmnopqrstuvwxyz§', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')
|
78
132
|
|
79
133
|
self.tr!(TR_FULL, TR_REDUCED.downcase)
|
80
134
|
unless options[:space]
|
@@ -186,7 +240,16 @@ class String
|
|
186
240
|
|
187
241
|
end
|
188
242
|
|
243
|
+
class Array
|
189
244
|
|
245
|
+
# reverse of {String#to_array_of_codepoints}
|
246
|
+
# @return [String]
|
247
|
+
#
|
248
|
+
def to_s_utf8
|
249
|
+
self.pack("U*").encode('utf-8')
|
250
|
+
end
|
251
|
+
|
252
|
+
end
|
190
253
|
|
191
254
|
|
192
255
|
if defined? TransparentNil
|
@@ -216,11 +279,17 @@ if $0 == __FILE__ then
|
|
216
279
|
|
217
280
|
#puts "Hallo"
|
218
281
|
# puts 'Scheiße'.reduce94(:german_sz => 'z')
|
219
|
-
test_down = 'àáâăäãāåạąæảấầắằабćĉčçċцчďðđдèéêěĕëēėęếеэфĝğġģгĥħхìíîĭïĩīıįijийĵюяķкĺľłļŀлмńňñņŋнòóôŏöõōøőơœопŕřŗрśŝšşсшщţťŧþтùúûŭüũūůűųưувŵýŷÿźżžжз'
|
220
|
-
test_up = 'ÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮIJИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ'
|
282
|
+
# test_down = 'àáâăäãāåạąæảấầắằабćĉčçċцчďðđдèéêěĕëēėęếеэфĝğġģгĥħхìíîĭïĩīıįijийĵюяķкĺľłļŀлмńňñņŋнòóôŏöõōøőơœопŕřŗрśŝšşсшщţťŧþтùúûŭüũūůűųưувŵýŷÿźżžжз'
|
283
|
+
# test_up = 'ÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮIJИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ'
|
221
284
|
|
222
|
-
puts "hallo".upcase!
|
285
|
+
# puts "hallo".upcase!
|
223
286
|
|
287
|
+
full = 'àáâăäãāåạąæảấầắằÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰ'
|
288
|
+
reduced = 'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAA'
|
289
|
+
|
290
|
+
full.each_char do |c|
|
291
|
+
puts c.noaccents
|
292
|
+
end
|
224
293
|
|
225
294
|
end
|
226
295
|
|
@@ -3,40 +3,112 @@
|
|
3
3
|
if $0 == __FILE__
|
4
4
|
require 'drumherum'
|
5
5
|
smart_init
|
6
|
+
require 'perception'
|
6
7
|
end
|
7
8
|
|
8
|
-
|
9
9
|
require 'hashery'
|
10
|
+
require 'unicode_utils/char_type'
|
10
11
|
|
11
12
|
|
12
13
|
unless defined?(TR_UPCASE_ALL_REGEXP)
|
14
|
+
|
15
|
+
leerzeichen = "\u2420\u2423\u00a0\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u202f\u205f\u3000"
|
16
|
+
klammer_auf = "\u227a\u226a\u3008\u276c\u2329\u25c1\u25c0"
|
17
|
+
klammer_zu = "\u227b\u226b\u3009\u276d\u232a\u25b7\u25b6"
|
18
|
+
|
19
|
+
# Sowohl reduce94 als auch to_ascii werden diese Zeichen übersetzen.
|
20
|
+
# Zeichen, die TR_FULL ergänzen und die UnicodeUtils.nfkd nicht korrekt umsetzt.
|
21
|
+
tr_full_b = %q{£₤¢‹¥›•«×»÷‚‘ƒ’ˆ§´¡„¿“¦”†‡µ′″°¤∗·⋅} + leerzeichen + klammer_auf + klammer_zu
|
22
|
+
tr_reduced_b = %q{LLc"Y"*"*"/''f'^P'!"?"|"~~u'"~~***} + (" "*leerzeichen.length) + ("<"*klammer_auf.length) + (">"*klammer_zu.length)
|
23
|
+
|
24
|
+
# Nur to_ascii wird diese Zeichen übersetzen.
|
25
|
+
# Zeichen, die in TR_FULL schon drin sind und die UnicodeUtils.nfkd nicht korrekt umsetzt
|
26
|
+
tr_full_c = %q{ØøðđÐĐħĦıĸłŁŧþŦÞаАбБцчЦЧдДеэЕЭфФгГхХийИЙюяЮЯкКлЛмМнНоОпПрРсшщСШЩтТуУвВжзЖЗ}
|
27
|
+
tr_reduced_c = %q{OoddDDhHiklLttTTaAbBccCCdDeeEEfFgGhHiiIIjjJJkKlLmMnNoOpPrRsssSSStTuUvVzzZZ}
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
# Nur to_ascii wird diese Zeichen übersetzen.
|
33
|
+
TR_EXTRA_CHARS = [
|
34
|
+
[/ß/, 'ss'],
|
35
|
+
[/Ö/, 'Oe'],
|
36
|
+
[/Ü/, 'Ue'],
|
37
|
+
[/Ä/, 'Ae'],
|
38
|
+
[/ö/, 'oe'],
|
39
|
+
[/ü/, 'ue'],
|
40
|
+
[/ä/, 'ae'],
|
41
|
+
[/€/, 'EUR'],
|
42
|
+
[/æ/, 'ae'],
|
43
|
+
[/Æ/, 'AE'],
|
44
|
+
[/œ/, 'oe'],
|
45
|
+
[/Œ/, 'OE'],
|
46
|
+
[/ŋ/, 'nj'],
|
47
|
+
[/Ŋ/, 'NJ'],
|
48
|
+
[/Š/, 'Sh'],
|
49
|
+
[/š/, 'sh'],
|
50
|
+
[/Ž/, 'Zh'],
|
51
|
+
[/ž/, 'zh'],
|
52
|
+
[/Ḃ/, 'Bh'],
|
53
|
+
[/ḃ/, 'bh'],
|
54
|
+
[/Ċ/, 'Ch'],
|
55
|
+
[/ċ/, 'ch'],
|
56
|
+
[/Ḋ/, 'Dh'],
|
57
|
+
[/ḋ/, 'dh'],
|
58
|
+
[/Ḟ/, 'Fh'],
|
59
|
+
[/ḟ/, 'fh'],
|
60
|
+
[/Ġ/, 'Gh'],
|
61
|
+
[/ġ/, 'gh'],
|
62
|
+
[/Ṁ/, 'Mh'],
|
63
|
+
[/ṁ/, 'mh'],
|
64
|
+
[/Ṡ/, 'Sh'],
|
65
|
+
[/ṡ/, 'sh'],
|
66
|
+
[/Ṫ/, 'Th'],
|
67
|
+
[/ṫ/, 'th'],
|
68
|
+
[/©/, '(c)'],
|
69
|
+
[/®/, '(r)'],
|
70
|
+
[/≤/, '<='],
|
71
|
+
[/≥/, '>='],
|
72
|
+
[/±/, '+/-'],
|
73
|
+
[/¼/, '1/4'],
|
74
|
+
[/½/, '1/2'],
|
75
|
+
[/¾/, '3/4'],
|
76
|
+
[/‰/, '%%'],
|
77
|
+
[/˜/, '~'],
|
78
|
+
[/[¬−‐‑‒–—―─]/, '-'] # macht Ärger und muss am Ende bleiben
|
79
|
+
]
|
80
|
+
patterns = TR_EXTRA_CHARS.collect { |search, replace| search }
|
81
|
+
RE_EXTRA_CHARS = Regexp.union(*patterns)
|
82
|
+
|
83
|
+
|
84
|
+
|
13
85
|
base = Hashery::Dictionary.new
|
14
|
-
base['a'] = ' àáâă äãā åạą
|
15
|
-
base['A'] = ' ÀÁÂĂ ÄÃĀ ÅẠĄ
|
16
|
-
base['b'] = '
|
17
|
-
base['B'] = '
|
86
|
+
base['a'] = ' àáâă äãā åạą ảấầắằ а ª æ '
|
87
|
+
base['A'] = ' ÀÁÂĂ ÄÃĀ ÅẠĄ ẢẤẦẮẰ А ª Æ '
|
88
|
+
base['b'] = ' ḃб '
|
89
|
+
base['B'] = ' ḂБ '
|
18
90
|
base['c'] = ' ćĉč çċ цч '
|
19
91
|
base['C'] = ' ĆĈČ ÇĊ ЦЧ '
|
20
|
-
base['d'] = '
|
21
|
-
base['D'] = '
|
92
|
+
base['d'] = ' ḋď ðđ д '
|
93
|
+
base['D'] = ' ḊĎ ÐĐ Д '
|
22
94
|
base['e'] = ' èéêěĕ ëēėę ế еэ '
|
23
95
|
base['E'] = ' ÈÉÊĚĔ ËĒĖĘ Ế ЕЭ '
|
24
|
-
base['f'] = '
|
25
|
-
base['F'] = '
|
96
|
+
base['f'] = ' ḟф '
|
97
|
+
base['F'] = ' ḞФ '
|
26
98
|
base['g'] = ' ĝğġ ģ г '
|
27
99
|
base['G'] = ' ĜĞĠ Ģ Г '
|
28
100
|
base['h'] = ' ĥħ х '
|
29
101
|
base['H'] = ' ĤĦ Х '
|
30
|
-
base['i'] = ' ìíîĭ ïĩīı į
|
31
|
-
base['I'] = ' ÌÍÎĬ ÏĨĪİ Į
|
102
|
+
base['i'] = ' ìíîĭ ïĩīı į ий'
|
103
|
+
base['I'] = ' ÌÍÎĬ ÏĨĪİ Į ИЙ'
|
32
104
|
base['j'] = ' ĵ юя '
|
33
105
|
base['J'] = ' Ĵ ЮЯ '
|
34
106
|
base['k'] = ' ķĸ к '
|
35
107
|
base['K'] = ' Ķĸ К '
|
36
108
|
base['l'] = ' ĺ ľłļŀ л '
|
37
109
|
base['L'] = ' Ĺ ĽŁĻĿ Л '
|
38
|
-
base['m'] = '
|
39
|
-
base['M'] = '
|
110
|
+
base['m'] = ' ṁм '
|
111
|
+
base['M'] = ' ṀМ '
|
40
112
|
base['n'] = ' ńň ñņʼnŋ н '
|
41
113
|
base['N'] = ' ŃŇ ÑŅʼnŊ Н '
|
42
114
|
base['o'] = ' òóôŏ öõō øőơ œ о '
|
@@ -47,10 +119,10 @@ unless defined?(TR_UPCASE_ALL_REGEXP)
|
|
47
119
|
base['Q'] = nil
|
48
120
|
base['r'] = ' ŕř ŗ р '
|
49
121
|
base['R'] = ' ŔŘ Ŗ Р '
|
50
|
-
base['s'] = '
|
51
|
-
base['S'] = '
|
52
|
-
base['t'] = '
|
53
|
-
base['T'] = '
|
122
|
+
base['s'] = ' ṡśŝš ßş сшщ '
|
123
|
+
base['S'] = ' ṠŚŜŠ ߪ СШЩ '
|
124
|
+
base['t'] = ' ṫţťŧþ т '
|
125
|
+
base['T'] = ' ṪŢŤŦÞ Т '
|
54
126
|
base['u'] = ' ùúûŭ üũū ůűųư у '
|
55
127
|
base['U'] = ' ÙÚÛŬ ÜŨŪ ŮŰŲƯ У '
|
56
128
|
base['v'] = ' в'
|
@@ -131,51 +203,113 @@ TR_DOWNCASE_ONLY = tr_downcase_only
|
|
131
203
|
end
|
132
204
|
|
133
205
|
|
134
|
-
TR_UPCASE =
|
135
|
-
TR_DOWNCASE =
|
136
|
-
TR_FULL =
|
137
|
-
TR_REDUCED =
|
206
|
+
TR_UPCASE = tr_upcase2
|
207
|
+
TR_DOWNCASE = tr_downcase2
|
208
|
+
TR_FULL = tr_full2 + tr_full_b
|
209
|
+
TR_REDUCED = tr_reduced2 + tr_reduced_b
|
210
|
+
TR_FULL_TO_ASCII = tr_full_b + tr_full_c
|
211
|
+
TR_REDUCED_TO_ASCII = tr_reduced_b + tr_reduced_c
|
138
212
|
TR_UPCASE_ALL_REGEXP = /^[A-ZÀÁÂĂÄÃĀÅẠĄÆẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮIJИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ]/
|
139
213
|
|
214
|
+
|
215
|
+
|
216
|
+
LANG_SPECIAL_CHARS = {
|
217
|
+
:german => ["ÄÖÜäöüß", "AeOeUeaeoeuess"],
|
218
|
+
:dutch => ["IJij", "IJij"],
|
219
|
+
:estonian => ["ŠšŽž", "ShshZhzh"],
|
220
|
+
:finnish => ["ŠšŽž", "ShshZhzh"],
|
221
|
+
:french => ["ŒœŸ", "OEoeY"],
|
222
|
+
:hungarian => ["ŐőŰű", "OoUu"],
|
223
|
+
:latin => ["ĀāĒēĪīŌōŪū","AaEeIiOoUu"],
|
224
|
+
:finnish => ["ĀāĒēĪīŌōŪū","AaEeIiOoUu"],
|
225
|
+
:turkish => ["İıĞğŞş", "IiGgSs"],
|
226
|
+
:welsh => ["ẀẁẂẃŴŵŶŷ", "WwWwWwYy"],
|
227
|
+
:irish => ["ḂḃĊċḊḋḞḟĠġṀṁṠṡṪṫ", "BhbhChchDhdhFhfhGhghMhmhShshThth"]
|
228
|
+
}
|
229
|
+
|
230
|
+
# :irish => ["ḂḃḊḋḞḟṀṁṠṡṪṫ", "BhbhChchDhdhFhfhGhghMhmhShshThth"]
|
231
|
+
|
232
|
+
|
233
|
+
|
140
234
|
end # unless defined?
|
141
235
|
|
142
236
|
|
237
|
+
class String
|
143
238
|
|
239
|
+
# @private
|
240
|
+
def to_ascii_extra_chars
|
241
|
+
result = tr(TR_FULL_TO_ASCII, TR_REDUCED_TO_ASCII)
|
242
|
+
result.gsub(RE_EXTRA_CHARS) do |match|
|
243
|
+
TR_EXTRA_CHARS.detect{ |search, replace| search =~ match}[1]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# @private
|
248
|
+
def to_ascii_minus
|
249
|
+
|
250
|
+
end
|
251
|
+
|
252
|
+
end # class
|
144
253
|
|
145
254
|
|
146
255
|
|
147
256
|
# -----------------------------------------------------------------------------------------
|
148
|
-
#
|
257
|
+
# TR_EXTRA_CHARS und TR_FULL manuell prüfen
|
149
258
|
#
|
150
259
|
if $0 == __FILE__ then
|
260
|
+
require 'kyanite/string/chars'
|
261
|
+
require 'kyanite/set'
|
151
262
|
|
152
|
-
|
153
|
-
puts TR_DOWNCASE_ONLY.inspect
|
154
|
-
|
155
|
-
|
156
|
-
# require 'perception'
|
157
|
-
# rawlog "\n----------------------------------------------------------\n\n"
|
158
263
|
|
159
|
-
# rawlog 'TR_DOWNCASE_ONLY= '
|
160
|
-
# rawlog TR_DOWNCASE_ONLY
|
161
|
-
# rawlog "\n"
|
162
|
-
|
163
|
-
# rawlog 'TR_FULL= '
|
164
|
-
# rawlog TR_FULL
|
165
|
-
# rawlog "\n"
|
166
|
-
# rawlog 'TR_REDUCED= '
|
167
|
-
# rawlog TR_REDUCED
|
168
|
-
# rawlog "\n"
|
169
|
-
|
170
|
-
# rawlog 'TR_UPCASE= '
|
171
|
-
# rawlog TR_UPCASE
|
172
|
-
# rawlog "\n"
|
173
|
-
# rawlog 'TR_DOWNCASE= '
|
174
|
-
# rawlog TR_DOWNCASE
|
175
|
-
# rawlog "\n"
|
176
|
-
|
177
|
-
|
178
264
|
|
265
|
+
# Überprüfe TR_EXTRA_CHARS
|
266
|
+
see
|
267
|
+
see "Überprüfe TR_EXTRA_CHARS"
|
268
|
+
see "========================"
|
269
|
+
see
|
270
|
+
see "defined in", "Dup if <>0", "Trivial?", "Hex Code", "Character", "reduce94", "to_ascii", "Klassifizierung"
|
271
|
+
startline = 14
|
272
|
+
i = 0
|
273
|
+
all = ""
|
274
|
+
TR_EXTRA_CHARS[0..-2].each do | a |
|
275
|
+
c = a[0].to_s[7]
|
276
|
+
all += c
|
277
|
+
see i+startline, # Definitionszeile
|
278
|
+
all.to_a.to_set.size-i-1, # Dup-Detector
|
279
|
+
(c.to_array_of_codepoints[0] <= 127 ? 'TRIVIAL':''), # Trivial-Detector
|
280
|
+
c.to_array_of_hex, # sein Code in HEX
|
281
|
+
c, # das Zeichen
|
282
|
+
c.reduce94, # was reduce94 daraus macht
|
283
|
+
c.to_ascii, # was to_ascii daraus macht
|
284
|
+
UnicodeUtils.char_type(c)
|
285
|
+
|
286
|
+
i+=1
|
287
|
+
end
|
288
|
+
|
289
|
+
# Überprüfe TR_FULL
|
290
|
+
see
|
291
|
+
see
|
292
|
+
see
|
293
|
+
see "Überprüfe TR_FULL"
|
294
|
+
see "================="
|
295
|
+
see
|
296
|
+
see "Nr", "Dup if <>0", "Trivial?", "Hex Code", "Character", "reduce94", "to_ascii", "Klassifizierung"
|
297
|
+
i = 0
|
298
|
+
all = ""
|
299
|
+
#TR_FULL_TO_ASCII.each_char do |c|
|
300
|
+
TR_FULL.each_char do |c|
|
301
|
+
all += c
|
302
|
+
see i,
|
303
|
+
all.to_a.to_set.size-i-1, # Dup-Detector
|
304
|
+
(c.to_array_of_codepoints[0] <= 127 ? 'TRIVIAL':''), # Trivial-Detector
|
305
|
+
c.to_array_of_hex, # sein Code in HEX
|
306
|
+
c, # das Zeichen
|
307
|
+
c.reduce94, # was reduce94 daraus macht
|
308
|
+
c.to_ascii, # was to_ascii daraus macht
|
309
|
+
UnicodeUtils.char_type(c)
|
310
|
+
|
311
|
+
i+=1
|
312
|
+
end
|
179
313
|
|
180
314
|
|
181
315
|
|
data/test/string/test_cast.rb
CHANGED
@@ -14,11 +14,7 @@ require 'kyanite/array'
|
|
14
14
|
# @!macro string
|
15
15
|
class TestKyaniteStringCast < UnitTest
|
16
16
|
|
17
|
-
|
18
|
-
test = "H¿llÛ"
|
19
|
-
assert_equal [72, 191, 108, 108, 219], test.to_array_of_codepoints
|
20
|
-
assert_equal test, [72, 191, 108, 108, 219].to_s_utf8
|
21
|
-
end
|
17
|
+
|
22
18
|
|
23
19
|
def test_to_nil
|
24
20
|
assert_equal 'e', 'e'.to_nil
|
data/test/string/test_chars.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
if $0 == __FILE__
|
4
4
|
require 'drumherum'
|
5
5
|
smart_init
|
6
|
+
require 'perception'
|
6
7
|
end
|
7
8
|
require 'drumherum/unit_test'
|
8
9
|
require 'kyanite/string/chars'
|
@@ -18,48 +19,175 @@ class TestKyaniteStringChars < UnitTest
|
|
18
19
|
# @!group clear / format text
|
19
20
|
#
|
20
21
|
|
22
|
+
def test_TR_EXTRA_CHARS
|
23
|
+
startline = 23 # Zeilennummer in der TR_EXTRA_CHARS definiert wird
|
24
|
+
i = 0
|
25
|
+
all = ""
|
26
|
+
TR_EXTRA_CHARS.each do | a |
|
27
|
+
c = a[0].to_s[7]
|
28
|
+
all += c
|
29
|
+
assert_equal 0, all.to_a.to_set.size-i-1, "TR_EXTRA_CHARS: Dup in Zeile #{i+startline} Zeichen #{c}"
|
30
|
+
#assert c.to_array_of_codepoints[0] > 127, "TR_EXTRA_CHARS: Trivialität in Zeile #{i+startline} Zeichen #{c}"
|
31
|
+
i+=1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def test_TR_FULL
|
37
|
+
assert_equal TR_FULL.length, TR_REDUCED.length
|
38
|
+
i = 0
|
39
|
+
all = ""
|
40
|
+
TR_FULL.each_char do | c |
|
41
|
+
r = TR_REDUCED[i]
|
42
|
+
all += c
|
43
|
+
#see "Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
|
44
|
+
assert_equal 0, all.to_a.to_set.size-i-1, "TR_FULL: Dup in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
|
45
|
+
assert c.to_array_of_codepoints[0] > 127, "TR_FULL: Trivialität in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
|
46
|
+
assert r.to_array_of_codepoints[0] <= 127, "TR_FULL: Zeichen Nr. #{i} Zeichen #{c} >> #{r} wird nicht in ASCII umgesetzt"
|
47
|
+
assert_equal c.reduce94, c.to_ascii[0]
|
48
|
+
i+=1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
def test_TR_FULL_TO_ASCII
|
54
|
+
assert_equal TR_FULL_TO_ASCII.length, TR_REDUCED_TO_ASCII.length
|
55
|
+
i = 0
|
56
|
+
all = ""
|
57
|
+
TR_FULL_TO_ASCII.each_char do | c |
|
58
|
+
r = TR_REDUCED_TO_ASCII[i]
|
59
|
+
all += c
|
60
|
+
#see "Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
|
61
|
+
assert_equal 0, all.to_a.to_set.size-i-1, "TR_FULL_TO_ASCII: Dup in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
|
62
|
+
assert c.to_array_of_codepoints[0] > 127, "TR_FULL_TO_ASCII: Trivialität in Zeichen Nr. #{i} Zeichen #{c} >> #{r}"
|
63
|
+
assert r.to_array_of_codepoints[0] <= 127, "TR_FULL_TO_ASCII: Zeichen Nr. #{i} Zeichen #{c} >> #{r} wird nicht in ASCII umgesetzt"
|
64
|
+
i+=1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def test_to_array_of_codepoints
|
70
|
+
test = "H¿llÛ"
|
71
|
+
assert_equal [72, 191, 108, 108, 219], test.to_array_of_codepoints
|
72
|
+
assert_equal test, [72, 191, 108, 108, 219].to_s_utf8
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_to_array_of_hex
|
76
|
+
euro = "\u20ac"
|
77
|
+
ffi = "\uFB03"
|
78
|
+
ix = "\u2168"
|
79
|
+
high5 = "\u2075"
|
80
|
+
all = euro + ffi + ix + high5
|
81
|
+
assert_equal ["20ac", "fb03", "2168", "2075"], all.to_array_of_hex
|
82
|
+
end
|
83
|
+
|
21
84
|
|
22
85
|
|
23
|
-
def
|
24
|
-
full = '
|
25
|
-
reduced = '
|
26
|
-
assert_equal reduced, full.reduce94
|
86
|
+
def test_to_ascii_a
|
87
|
+
full = 'ªàáâăãāåạąảấầắằÀÁÂĂÃĀÅẠĄẢẤẦẮẰ'
|
88
|
+
reduced = 'aaaaaaaaaaaaaaaAAAAAAAAAAAAAA'
|
89
|
+
assert_equal reduced, full.reduce94
|
90
|
+
assert_equal reduced, full.to_ascii
|
27
91
|
end
|
28
92
|
|
29
|
-
def
|
30
|
-
full =
|
31
|
-
|
32
|
-
|
93
|
+
def test_to_ascii_b
|
94
|
+
full = 'ćĉčçċĆĈČÇĊďĎèéêěĕëēėęếÈÉÊĚĔËĒĖĘẾ'
|
95
|
+
reduced1 = 'cccccCCCCCdDeeeeeeeeeeEEEEEEEEEE'
|
96
|
+
reduced2 = 'ccccchCCCCChdDeeeeeeeeeeEEEEEEEEEE'
|
97
|
+
assert_equal reduced1, full.reduce94
|
98
|
+
assert_equal reduced2, full.to_ascii
|
33
99
|
end
|
34
100
|
|
35
|
-
def
|
36
|
-
full =
|
37
|
-
|
38
|
-
|
101
|
+
def test_to_ascii_c
|
102
|
+
full = 'ĝğġģĜĞĠĢĥĤìíîĭïĩīįÌÍÎĬÏĨĪİĮĵĴķĶĺľļŀĹĽĻĿ'
|
103
|
+
reduced1 = 'ggggGGGGhHiiiiiiiiIIIIIIIIIjJkKllllLLLL'
|
104
|
+
reduced2 = 'ggghgGGGhGhHiiiiiiiiIIIIIIIIIjJkKllllLLLL'
|
105
|
+
assert_equal reduced1, full.reduce94
|
106
|
+
assert_equal reduced2, full.to_ascii
|
39
107
|
end
|
40
108
|
|
41
|
-
def
|
42
|
-
full = '
|
43
|
-
reduced = '
|
109
|
+
def test_to_ascii_e
|
110
|
+
full = 'ńňñņʼnŃŇÑŅòóôŏõōőơÒÓÔŎÕŌŐƠ'
|
111
|
+
reduced = 'nnnnnNNNNooooooooOOOOOOOO'
|
44
112
|
assert_equal reduced, full.reduce94
|
113
|
+
assert_equal reduced, full.to_ascii
|
45
114
|
end
|
46
115
|
|
47
|
-
def
|
48
|
-
full =
|
49
|
-
|
50
|
-
|
116
|
+
def test_to_ascii_f
|
117
|
+
full = 'ŕřŗŔŘŖśŝšşŚŜŠŞţťŢŤùúûŭũūůűųưÙÚÛŬŨŪŮŰŲƯŵŴýŷÿÝŶŸźżžŹŻŽ'
|
118
|
+
reduced1 = 'rrrRRRssssSSSSttTTuuuuuuuuuuUUUUUUUUUUwWyyyYYYzzzZZZ'
|
119
|
+
reduced2 = 'rrrRRRssshsSSShSttTTuuuuuuuuuuUUUUUUUUUUwWyyyYYYzzzhZZZh'
|
120
|
+
assert_equal reduced1, full.reduce94
|
121
|
+
assert_equal reduced2, full.to_ascii
|
51
122
|
end
|
52
123
|
|
124
|
+
def test_to_ascii_zusammengesetzt
|
125
|
+
full = 'ijIJſ…'
|
126
|
+
reduced = 'ijIJs...'
|
127
|
+
assert_equal reduced, full.to_ascii
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_to_ascii_same_same
|
131
|
+
same_same = '^!"$%&/()=?@*+~#<>|,;:.-_ {[]}\\'
|
132
|
+
assert_equal same_same, same_same.to_ascii
|
133
|
+
same_same = "'0123456789"
|
134
|
+
assert_equal same_same, same_same.to_ascii
|
135
|
+
same_same = 'abcdefghijklmnopqrstuvwxyz'
|
136
|
+
assert_equal same_same, same_same.to_ascii
|
137
|
+
same_same = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
138
|
+
assert_equal same_same, same_same.to_ascii
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
def test_to_ascii_same_same
|
143
|
+
full = '¯¨'
|
144
|
+
reduced = ' ' * full.length
|
145
|
+
assert_equal 2, full.length
|
146
|
+
assert_equal reduced, full.to_ascii
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_to_ascii_s
|
150
|
+
ffi = "\uFB03"
|
151
|
+
ix = "\u2168"
|
152
|
+
high23="²³"
|
153
|
+
high5 = "\u2075"
|
154
|
+
full = ffi + ix + high23 + high5 + "€ßÖÜÄöüä"
|
155
|
+
reduced1 = "sOUAoua"
|
156
|
+
reduced2 = "ffiIX235EURssOeUeAeoeueae"
|
157
|
+
assert_equal reduced1, full.reduce94
|
158
|
+
assert_equal reduced2, full.to_ascii
|
159
|
+
end
|
160
|
+
|
161
|
+
def test_LANG_SPECIAL_CHARS
|
162
|
+
LANG_SPECIAL_CHARS .each do | lang, (full, reduced) |
|
163
|
+
#see lang, full, reduced, full.to_ascii, full.reduce94
|
164
|
+
assert_equal reduced, full.to_ascii
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_spaces
|
169
|
+
spaces = "\u0020\u00a0\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u202f\u205f\u3000\u2420\u2423"
|
170
|
+
assert_equal spaces.to_ascii, " " * spaces.length
|
171
|
+
assert_equal spaces.reduce94, " " * spaces.length
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
def test_minus_signs
|
176
|
+
minus = "\u00ac\u2212\u2010\u2011\u2012\u2013\u2014\u2015\u2500"
|
177
|
+
assert_equal minus.to_ascii, "-" * minus.length
|
178
|
+
#assert_equal spaces.reduce94, " " * spaces.length
|
179
|
+
end
|
180
|
+
|
53
181
|
|
54
182
|
|
55
183
|
|
56
184
|
def test_reduce94_full
|
57
185
|
full = <<ENDOFSTRING
|
58
|
-
|
186
|
+
àáâăäãāåạąảấầắằÀÁÂĂÄÃĀÅẠĄẢẤẦẮẰćĉčçċĆĈČÇĊďðđĎÐĐèéêěĕëēėęếÈÉÊĚĔËĒĖĘẾĝğġģĜĞĠĢĥħĤĦìíîĭïĩīıįÌÍÎĬÏĨĪİĮĵĴķĶĺľłļŀĹĽŁĻĿńňñņʼnŋŃŇÑŅŊòóôŏöõōøőơœÒÓÔŎÖÕŌØŐƠŒŕřŗŔŘŖśŝšßşŚŜŠŞţťŧþŢŤŦÞùúûŭüũūůűųưÙÚÛŬÜŨŪŮŰŲƯŵŴýŷÿÝŶŸźżžŹŻŽ
|
59
187
|
ENDOFSTRING
|
60
188
|
|
61
189
|
reduced = <<ENDOFSTRING
|
62
|
-
|
190
|
+
aaaaaaaaaaaaaaaAAAAAAAAAAAAAAAcccccCCCCCdddDDDeeeeeeeeeeEEEEEEEEEEggggGGGGhhHHiiiiiiiiiIIIIIIIIIjJkKlllllLLLLLnnnnnnNNNNNoooooooooooOOOOOOOOOOOrrrRRRsssssSSSSttttTTTTuuuuuuuuuuuUUUUUUUUUUUwWyyyYYYzzzZZZ
|
63
191
|
ENDOFSTRING
|
64
192
|
|
65
193
|
full = full.chomp
|
@@ -160,8 +288,8 @@ ENDOFSTRING
|
|
160
288
|
|
161
289
|
|
162
290
|
def test_downcase_upcase
|
163
|
-
test_down = '
|
164
|
-
test_up = '
|
291
|
+
test_down = 'àáâăäãāåạąảấầắằабćĉčçċцчďðđдèéêěĕëēėęếеэфĝğġģгĥħхìíîĭïĩīıįийĵюяķкĺľłļŀлмńňñņŋнòóôŏöõōøőơœопŕřŗрśŝšşсшщţťŧþтùúûŭüũūůűųưувŵýŷÿźżžжз'
|
292
|
+
test_up = 'ÀÁÂĂÄÃĀÅẠĄẢẤẦẮẰАБĆĈČÇĊЦЧĎÐĐДÈÉÊĚĔËĒĖĘẾЕЭФĜĞĠĢГĤĦХÌÍÎĬÏĨĪİĮИЙĴЮЯĶКĹĽŁĻĿЛМŃŇÑŅŊНÒÓÔŎÖÕŌØŐƠŒОПŔŘŖРŚŜŠŞСШЩŢŤŦÞТÙÚÛŬÜŨŪŮŰŲƯУВŴÝŶŸŹŻŽЖЗ'
|
165
293
|
|
166
294
|
# Bescheid sagen, sobald Ruby oder ActiveSupport von sich aus funktionieren
|
167
295
|
assert_not_equal test_down, test_up.downcase
|
data/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kyanite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: drumherum
|
@@ -139,6 +139,22 @@ dependencies:
|
|
139
139
|
- - ! '>='
|
140
140
|
- !ruby/object:Gem::Version
|
141
141
|
version: 2.0.1
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: unicode_utils
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.4.0
|
150
|
+
type: :runtime
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 1.4.0
|
142
158
|
- !ruby/object:Gem::Dependency
|
143
159
|
name: rdoc
|
144
160
|
requirement: !ruby/object:Gem::Requirement
|