prawn-arabic 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/string_utf_support.rb +26 -22
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b44f2721e1423ee982bac9aecfe6deb099a0b4b0
|
4
|
+
data.tar.gz: 34c3c9e314e25ff1045e3a375bf82f95e2e5273f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c60b592613bcb6f287c05f035865c79b00277731982cb4a6377358c017d1e4a51cb3cd45d124cbe1ebca36cd81ca2065ea86c92f27a408edffc70d330872462
|
7
|
+
data.tar.gz: a2ea78a47f545d3e636a10effa27c2c8d34a16c2ae1d1212113c43fb8acea8663fc98e2c7071384800505b7c553645878936e15a29bc2974eebbed586919aea1
|
data/lib/string_utf_support.rb
CHANGED
@@ -1,19 +1,20 @@
|
|
1
|
+
# encoding: ascii-8bit
|
1
2
|
class String
|
2
3
|
|
3
4
|
require 'iconv'
|
4
5
|
require 'open-uri' # cf. http://www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/index.html
|
5
6
|
|
6
7
|
# taken from: http://www.w3.org/International/questions/qa-forms-utf-8
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
# UTF8REGEX = /\A(?: # ?: non-capturing group (grouping with no back references)
|
9
|
+
# [\x09\x0A\x0D\x20-\x7E] # ASCII
|
10
|
+
# | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
11
|
+
# | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
12
|
+
# | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
13
|
+
# | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
14
|
+
# | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
15
|
+
# | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
16
|
+
# | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
17
|
+
# )*\z/mxn
|
17
18
|
|
18
19
|
|
19
20
|
# create UTF-8 character arrays (as class instance variables)
|
@@ -28,17 +29,20 @@ class String
|
|
28
29
|
|
29
30
|
|
30
31
|
# test data
|
31
|
-
@small_letters_utf8 = ["U+00F1", "U+00F4", "U+00E6", "U+00F8", "U+00E0", "U+00E1", "U+00E2", "U+00E4", "U+00E5", "U+00E7", "U+00E8", "U+00E9", "U+00EA", "U+00EB", "U+0153"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
|
32
|
+
# @small_letters_utf8 = ["U+00F1", "U+00F4", "U+00E6", "U+00F8", "U+00E0", "U+00E1", "U+00E2", "U+00E4", "U+00E5", "U+00E7", "U+00E8", "U+00E9", "U+00EA", "U+00EB", "U+0153"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
|
33
|
+
@small_letters_utf8 = ["U+00F1", "U+00F4", "U+00E6", "U+00F8", "U+00E0", "U+00E1", "U+00E2", "U+00E4", "U+00E5", "U+00E7", "U+00E8", "U+00E9", "U+00EA", "U+00EB", "U+0153"].map { |x| u = [x[2..-1].hex].pack("U*"); u.valid_encoding? ? u : nil}
|
32
34
|
|
33
35
|
|
34
|
-
@capital_letters_utf8 = ["U+00D1", "U+00D4", "U+00C6", "U+00D8", "U+00C0", "U+00C1", "U+00C2", "U+00C4", "U+00C5", "U+00C7", "U+00C8", "U+00C9", "U+00CA", "U+00CB", "U+0152"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
|
36
|
+
# @capital_letters_utf8 = ["U+00D1", "U+00D4", "U+00C6", "U+00D8", "U+00C0", "U+00C1", "U+00C2", "U+00C4", "U+00C5", "U+00C7", "U+00C8", "U+00C9", "U+00CA", "U+00CB", "U+0152"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
|
37
|
+
@capital_letters_utf8 = ["U+00D1", "U+00D4", "U+00C6", "U+00D8", "U+00C0", "U+00C1", "U+00C2", "U+00C4", "U+00C5", "U+00C7", "U+00C8", "U+00C9", "U+00CA", "U+00CB", "U+0152"].map { |x| u = [x[2..-1].hex].pack("U*"); u.valid_encoding? ? u : nil }
|
35
38
|
|
36
39
|
|
37
|
-
@other_letters_utf8 = ["U+03A3", "U+0639", "U+0041", "U+F8D0", "U+F8FF", "U+4E2D", "U+F4EE", "U+00FE", "U+10FFFF", "U+00A9", "U+20AC", "U+221E", "U+20AC", "U+FEFF", "U+FFFD", "U+00FF", "U+00FE", "U+FFFE", "U+FEFF"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
|
40
|
+
# @other_letters_utf8 = ["U+03A3", "U+0639", "U+0041", "U+F8D0", "U+F8FF", "U+4E2D", "U+F4EE", "U+00FE", "U+10FFFF", "U+00A9", "U+20AC", "U+221E", "U+20AC", "U+FEFF", "U+FFFD", "U+00FF", "U+00FE", "U+FFFE", "U+FEFF"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
|
41
|
+
@other_letters_utf8 = ["U+03A3", "U+0639", "U+0041", "U+F8D0", "U+F8FF", "U+4E2D", "U+F4EE", "U+00FE", "U+10FFFF", "U+00A9", "U+20AC", "U+221E", "U+20AC", "U+FEFF", "U+FFFD", "U+00FF", "U+00FE", "U+FFFE", "U+FEFF"].map { |x| u = [x[2..-1].hex].pack("U*"); u.valid_encoding? ? u : nil }
|
38
42
|
|
39
|
-
if @small_letters_utf8.size != @small_letters_utf8.
|
40
|
-
if @capital_letters_utf8.size != @capital_letters_utf8.
|
41
|
-
if @other_letters_utf8.size != @other_letters_utf8.
|
43
|
+
if @small_letters_utf8.size != @small_letters_utf8.count{|x| !x.nil?} then raise "Invalid UTF-8 char in @small_letters_utf8!" end
|
44
|
+
if @capital_letters_utf8.size != @capital_letters_utf8.count{|x| !x.nil?} then raise "Invalid UTF-8 char in @capital_letters_utf8!" end
|
45
|
+
if @other_letters_utf8.size != @other_letters_utf8.count{|x| !x.nil?} then raise "Invalid UTF-8 char in @other_letters_utf8!" end
|
42
46
|
|
43
47
|
|
44
48
|
@unicode_array = []
|
@@ -286,7 +290,7 @@ class String
|
|
286
290
|
|
287
291
|
|
288
292
|
# note that the i option does not work in special cases with back references
|
289
|
-
# example: "��".slice_utf8(/(.).*?\1/i) returns nil whereas "aA".slice(/(.).*?\1/i) returns "aA"
|
293
|
+
# example: "��".slice_utf8(/(.).*?\1/i) returns nil whereas "aA".slice(/(.).*?\1/i) returns "aA"
|
290
294
|
def slice_utf8(regex)
|
291
295
|
opts = regex.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
|
292
296
|
if opts.count('u') == 0 then opts = opts + "u" end
|
@@ -398,12 +402,14 @@ class String
|
|
398
402
|
end
|
399
403
|
|
400
404
|
def utf8?
|
401
|
-
|
405
|
+
# self =~ UTF8REGEX
|
406
|
+
encoding == Encoding.find("UTF-8") && valid_encoding?
|
402
407
|
end
|
403
408
|
|
404
|
-
def
|
409
|
+
def a
|
405
410
|
t = ""
|
406
|
-
self.scan(/./um) { |c| t << c if c =~ UTF8REGEX }
|
411
|
+
# self.scan(/./um) { |c| t << c if c =~ UTF8REGEX }
|
412
|
+
chars.each { |c| t << c if c.utf8? }
|
407
413
|
t
|
408
414
|
end
|
409
415
|
|
@@ -722,5 +728,3 @@ class String
|
|
722
728
|
end
|
723
729
|
|
724
730
|
end
|
725
|
-
|
726
|
-
|