prawn-arabic 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/string_utf_support.rb +26 -22
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 962e5bd8963ad4181a3e2de6d56ccee373b2986c
4
- data.tar.gz: 9be4f8f12fbfd1a4938a6c17eeb963faa115ed78
3
+ metadata.gz: b44f2721e1423ee982bac9aecfe6deb099a0b4b0
4
+ data.tar.gz: 34c3c9e314e25ff1045e3a375bf82f95e2e5273f
5
5
  SHA512:
6
- metadata.gz: 670db311ad30997c0458f78fa10d6b22018e5347c18255b5c07d99e7b774cc4727f91a7d6c398ae9ab77c69a72f74532605c1a6defe70aeeef19e679dc82976f
7
- data.tar.gz: 85cfba1b766936a56f11e3be1799276a42d9638ba8133f3b06189c55d36e1a0e9722266f4d3dc3562088511ca04979bfb3a1f83f7da713baf9e697a79a745a9a
6
+ metadata.gz: 6c60b592613bcb6f287c05f035865c79b00277731982cb4a6377358c017d1e4a51cb3cd45d124cbe1ebca36cd81ca2065ea86c92f27a408edffc70d330872462
7
+ data.tar.gz: a2ea78a47f545d3e636a10effa27c2c8d34a16c2ae1d1212113c43fb8acea8663fc98e2c7071384800505b7c553645878936e15a29bc2974eebbed586919aea1
@@ -1,19 +1,20 @@
1
+ # encoding: ascii-8bit
1
2
  class String
2
3
 
3
4
  require 'iconv'
4
5
  require 'open-uri' # cf. http://www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/index.html
5
6
 
6
7
  # taken from: http://www.w3.org/International/questions/qa-forms-utf-8
7
- UTF8REGEX = /\A(?: # ?: non-capturing group (grouping with no back references)
8
- [\x09\x0A\x0D\x20-\x7E] # ASCII
9
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
10
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
11
- | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
12
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
13
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
14
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
15
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
16
- )*\z/mnx
8
+ # UTF8REGEX = /\A(?: # ?: non-capturing group (grouping with no back references)
9
+ # [\x09\x0A\x0D\x20-\x7E] # ASCII
10
+ # | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
11
+ # | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
12
+ # | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
13
+ # | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
14
+ # | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
15
+ # | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
16
+ # | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
17
+ # )*\z/mxn
17
18
 
18
19
 
19
20
  # create UTF-8 character arrays (as class instance variables)
@@ -28,17 +29,20 @@ class String
28
29
 
29
30
 
30
31
  # test data
31
- @small_letters_utf8 = ["U+00F1", "U+00F4", "U+00E6", "U+00F8", "U+00E0", "U+00E1", "U+00E2", "U+00E4", "U+00E5", "U+00E7", "U+00E8", "U+00E9", "U+00EA", "U+00EB", "U+0153"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
32
+ # @small_letters_utf8 = ["U+00F1", "U+00F4", "U+00E6", "U+00F8", "U+00E0", "U+00E1", "U+00E2", "U+00E4", "U+00E5", "U+00E7", "U+00E8", "U+00E9", "U+00EA", "U+00EB", "U+0153"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
33
+ @small_letters_utf8 = ["U+00F1", "U+00F4", "U+00E6", "U+00F8", "U+00E0", "U+00E1", "U+00E2", "U+00E4", "U+00E5", "U+00E7", "U+00E8", "U+00E9", "U+00EA", "U+00EB", "U+0153"].map { |x| u = [x[2..-1].hex].pack("U*"); u.valid_encoding? ? u : nil}
32
34
 
33
35
 
34
- @capital_letters_utf8 = ["U+00D1", "U+00D4", "U+00C6", "U+00D8", "U+00C0", "U+00C1", "U+00C2", "U+00C4", "U+00C5", "U+00C7", "U+00C8", "U+00C9", "U+00CA", "U+00CB", "U+0152"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
36
+ # @capital_letters_utf8 = ["U+00D1", "U+00D4", "U+00C6", "U+00D8", "U+00C0", "U+00C1", "U+00C2", "U+00C4", "U+00C5", "U+00C7", "U+00C8", "U+00C9", "U+00CA", "U+00CB", "U+0152"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
37
+ @capital_letters_utf8 = ["U+00D1", "U+00D4", "U+00C6", "U+00D8", "U+00C0", "U+00C1", "U+00C2", "U+00C4", "U+00C5", "U+00C7", "U+00C8", "U+00C9", "U+00CA", "U+00CB", "U+0152"].map { |x| u = [x[2..-1].hex].pack("U*"); u.valid_encoding? ? u : nil }
35
38
 
36
39
 
37
- @other_letters_utf8 = ["U+03A3", "U+0639", "U+0041", "U+F8D0", "U+F8FF", "U+4E2D", "U+F4EE", "U+00FE", "U+10FFFF", "U+00A9", "U+20AC", "U+221E", "U+20AC", "U+FEFF", "U+FFFD", "U+00FF", "U+00FE", "U+FFFE", "U+FEFF"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
40
+ # @other_letters_utf8 = ["U+03A3", "U+0639", "U+0041", "U+F8D0", "U+F8FF", "U+4E2D", "U+F4EE", "U+00FE", "U+10FFFF", "U+00A9", "U+20AC", "U+221E", "U+20AC", "U+FEFF", "U+FFFD", "U+00FF", "U+00FE", "U+FFFE", "U+FEFF"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
41
+ @other_letters_utf8 = ["U+03A3", "U+0639", "U+0041", "U+F8D0", "U+F8FF", "U+4E2D", "U+F4EE", "U+00FE", "U+10FFFF", "U+00A9", "U+20AC", "U+221E", "U+20AC", "U+FEFF", "U+FFFD", "U+00FF", "U+00FE", "U+FFFE", "U+FEFF"].map { |x| u = [x[2..-1].hex].pack("U*"); u.valid_encoding? ? u : nil }
38
42
 
39
- if @small_letters_utf8.size != @small_letters_utf8.nitems then raise "Invalid UTF-8 char in @small_letters_utf8!" end
40
- if @capital_letters_utf8.size != @capital_letters_utf8.nitems then raise "Invalid UTF-8 char in @capital_letters_utf8!" end
41
- if @other_letters_utf8.size != @other_letters_utf8.nitems then raise "Invalid UTF-8 char in @other_letters_utf8!" end
43
+ if @small_letters_utf8.size != @small_letters_utf8.count{|x| !x.nil?} then raise "Invalid UTF-8 char in @small_letters_utf8!" end
44
+ if @capital_letters_utf8.size != @capital_letters_utf8.count{|x| !x.nil?} then raise "Invalid UTF-8 char in @capital_letters_utf8!" end
45
+ if @other_letters_utf8.size != @other_letters_utf8.count{|x| !x.nil?} then raise "Invalid UTF-8 char in @other_letters_utf8!" end
42
46
 
43
47
 
44
48
  @unicode_array = []
@@ -286,7 +290,7 @@ class String
286
290
 
287
291
 
288
292
  # note that the i option does not work in special cases with back references
289
- # example: "��".slice_utf8(/(.).*?\1/i) returns nil whereas "aA".slice(/(.).*?\1/i) returns "aA"
293
+ # example: "��".slice_utf8(/(.).*?\1/i) returns nil whereas "aA".slice(/(.).*?\1/i) returns "aA"
290
294
  def slice_utf8(regex)
291
295
  opts = regex.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
292
296
  if opts.count('u') == 0 then opts = opts + "u" end
@@ -398,12 +402,14 @@ class String
398
402
  end
399
403
 
400
404
  def utf8?
401
- self =~ UTF8REGEX
405
+ # self =~ UTF8REGEX
406
+ encoding == Encoding.find("UTF-8") && valid_encoding?
402
407
  end
403
408
 
404
- def clean_utf8
409
+ def a
405
410
  t = ""
406
- self.scan(/./um) { |c| t << c if c =~ UTF8REGEX }
411
+ # self.scan(/./um) { |c| t << c if c =~ UTF8REGEX }
412
+ chars.each { |c| t << c if c.utf8? }
407
413
  t
408
414
  end
409
415
 
@@ -722,5 +728,3 @@ class String
722
728
  end
723
729
 
724
730
  end
725
-
726
-
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prawn-arabic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dynamix Solutions