RubyGems - Arabic-Prawn - Versions diffs - 0.0.1 - Mend

Arabic-Prawn 0.0.1

Potentially problematic release.

This version of Arabic-Prawn might be problematic. Click here for more details.

Files changed (6) hide show

data/LICENSE ADDED

@@ -0,0 +1,3 @@
+== Arabic-Prawn
+Put appropriate LICENSE for your project here.

data/README ADDED

@@ -0,0 +1,3 @@
+== Arabic-Prawn
+You should document your project here.

data/Rakefile ADDED

@@ -0,0 +1,50 @@
+#
+# To change this template, choose Tools | Templates
+# and open the template in the editor.
+require 'rubygems'
+require 'rake'
+require 'rake/clean'
+require 'rake/gempackagetask'
+require 'rake/rdoctask'
+require 'rake/testtask'
+require 'spec/rake/spectask'
+spec = Gem::Specification.new do |s|
+  s.name = 'Arabic-Prawn'
+  s.version = '0.0.1'
+  s.has_rdoc = true
+  s.extra_rdoc_files = ['README', 'LICENSE']
+  s.summary = 'Your summary here'
+  s.description = s.summary
+  s.author = 'Dynamix Solutions'
+  s.email = 'ahmed.nasser@dynamix-systems.com'
+  # s.executables = ['your_executable_here']
+  s.files = %w(LICENSE README Rakefile) + Dir.glob("{bin,lib,spec}/**/*")
+  s.require_path = "lib"
+  s.bindir = "bin"
+end
+Rake::GemPackageTask.new(spec) do |p|
+  p.gem_spec = spec
+  p.need_tar = true
+  p.need_zip = true
+end
+Rake::RDocTask.new do |rdoc|
+  files =['README', 'LICENSE', 'lib/**/*.rb']
+  rdoc.rdoc_files.add(files)
+  rdoc.main = "README" # page to start on
+  rdoc.title = "Arabic-Prawn Docs"
+  rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
+  rdoc.options << '--line-numbers'
+end
+Rake::TestTask.new do |t|
+  t.test_files = FileList['test/**/*.rb']
+end
+Spec::Rake::SpecTask.new do |t|
+  t.spec_files = FileList['spec/**/*.rb']
+end

data/lib/arabic-prawn.rb ADDED

@@ -0,0 +1,527 @@
+require 'string_utf_support'
+class CharacterFormat
+  Isolated = 1
+	Initial = 2
+	Medial = 3
+	Final = 4
+end
+class ArabicCharacterInfo
+	@@arabic_characters_map = nil
+	attr_accessor :common_encoding , :format_encodings, :is_connected
+	def initialize(common, isolated, final, initial, medial, is_connected)
+		@common_encoding = common.unicode_to_utf8
+		@format_encodings = Hash.new
+		@format_encodings[CharacterFormat::Isolated] = isolated.unicode_to_utf8
+		@format_encodings[CharacterFormat::Initial] = initial.unicode_to_utf8
+		@format_encodings[CharacterFormat::Medial] = medial.unicode_to_utf8
+		@format_encodings[CharacterFormat::Final] = final.unicode_to_utf8
+		@is_connected = is_connected
+	end
+	def ArabicCharacterInfo.get_arabic_characters_map
+		if !@@arabic_characters_map.nil?
+			return @@arabic_characters_map
+		end
+		map = Hash.new
+			#Alef
+			new_character = ArabicCharacterInfo.new(
+				"U+0627",	#Common
+				"U+fe8d", #Isolated
+				"U+fe8e", #Final
+				"U+fe8d", #Initial
+				"U+fe8e", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		#Beh
+			new_character = ArabicCharacterInfo.new(
+				"U+0628",	#Common
+				"U+fe8f", #Isolated
+				"U+fe90", #Final
+				"U+fe91", #Initial
+				"U+fe92", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Teh
+			new_character = ArabicCharacterInfo.new(
+				"U+062a",	#Common
+				"U+fe95", #Isolated
+				"U+fe96", #Final
+				"U+fe97", #Initial
+				"U+fe98", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Theh
+			new_character = ArabicCharacterInfo.new(
+				"U+062b",	#Common
+				"U+fe99", #Isolated
+				"U+fe9a", #Final
+				"U+fe9b", #Initial
+				"U+fe9c", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Jeem
+			new_character = ArabicCharacterInfo.new(
+				"U+062c",	#Common
+				"U+fe9d", #Isolated
+				"U+fe9e", #Final
+				"U+fe9f", #Initial
+				"U+fea0", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#7ah
+			new_character = ArabicCharacterInfo.new(
+				"U+062d",	#Common
+				"U+fea1", #Isolated
+				"U+fea2", #Final
+				"U+fea3", #Initial
+				"U+fea4", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#7'ah
+			new_character = ArabicCharacterInfo.new(
+				"U+062e",	#Common
+				"U+fea5", #Isolated
+				"U+fea6", #Final
+				"U+fea7", #Initial
+				"U+fea8", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Dal
+			new_character = ArabicCharacterInfo.new(
+				"U+062f",	#Common
+				"U+fea9", #Isolated
+				"U+feaa", #Final
+				"U+fea9", #Initial
+				"U+feaa", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		#Thal
+			new_character = ArabicCharacterInfo.new(
+				"U+0630",	#Common
+				"U+feab", #Isolated
+				"U+feac", #Final
+				"U+feab", #Initial
+				"U+feac", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		#Rah
+			new_character = ArabicCharacterInfo.new(
+				"U+0631",	#Common
+				"U+fead", #Isolated
+				"U+feae", #Final
+				"U+fead", #Initial
+				"U+feae", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		#Zein
+			new_character = ArabicCharacterInfo.new(
+				"U+0632",	#Common
+				"U+feaf", #Isolated
+				"U+feb0", #Final
+				"U+feaf", #Initial
+				"U+feb0", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		#Seen
+			new_character = ArabicCharacterInfo.new(
+				"U+0633",	#Common
+				"U+feb1", #Isolated
+				"U+feb2", #Final
+				"U+feb3", #Initial
+				"U+feb4", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Sheen
+			new_character = ArabicCharacterInfo.new(
+				"U+0634",	#Common
+				"U+feb5", #Isolated
+				"U+feb6", #Final
+				"U+feb7", #Initial
+				"U+feb8", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Sad
+			new_character = ArabicCharacterInfo.new(
+				"U+0635",	#Common
+				"U+feb9", #Isolated
+				"U+feba", #Final
+				"U+febb", #Initial
+				"U+febc", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Dad
+			new_character = ArabicCharacterInfo.new(
+				"U+0636",	#Common
+				"U+febd", #Isolated
+				"U+febe", #Final
+				"U+febf", #Initial
+				"U+fec0", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Tah
+			new_character = ArabicCharacterInfo.new(
+				"U+0637",	#Common
+				"U+fec1", #Isolated
+				"U+fec2", #Final
+				"U+fec3", #Initial
+				"U+fec4", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Thah
+			new_character = ArabicCharacterInfo.new(
+				"U+0638",	#Common
+				"U+fec5", #Isolated
+				"U+fec6", #Final
+				"U+fec7", #Initial
+				"U+fec8", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#3ein
+			new_character = ArabicCharacterInfo.new(
+				"U+0639",	#Common
+				"U+fec9", #Isolated
+				"U+feca", #Final
+				"U+fecb", #Initial
+				"U+fecc", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#3'ein
+			new_character = ArabicCharacterInfo.new(
+				"U+063a",	#Common
+				"U+fecd", #Isolated
+				"U+fece", #Final
+				"U+fecf", #Initial
+				"U+fed0", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Feh
+			new_character = ArabicCharacterInfo.new(
+				"U+0641",	#Common
+				"U+fed1", #Isolated
+				"U+fed2", #Final
+				"U+fed3", #Initial
+				"U+fed4", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Qaf
+			new_character = ArabicCharacterInfo.new(
+				"U+0642",	#Common
+				"U+fed5", #Isolated
+				"U+fed6", #Final
+				"U+fed7", #Initial
+				"U+fed8", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Kaf
+			new_character = ArabicCharacterInfo.new(
+				"U+0643",	#Common
+				"U+fed9", #Isolated
+				"U+feda", #Final
+				"U+fedb", #Initial
+				"U+fedc", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Lam
+			new_character = ArabicCharacterInfo.new(
+				"U+0644",	#Common
+				"U+fedd", #Isolated
+				"U+fede", #Final
+				"U+fedf", #Initial
+				"U+fee0", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Meem
+			new_character = ArabicCharacterInfo.new(
+				"U+0645",	#Common
+				"U+fee1", #Isolated
+				"U+fee2", #Final
+				"U+fee3", #Initial
+				"U+fee4", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Noon
+			new_character = ArabicCharacterInfo.new(
+				"U+0646",	#Common
+				"U+fee5", #Isolated
+				"U+fee6", #Final
+				"U+fee7", #Initial
+				"U+fee8", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Heh
+			new_character = ArabicCharacterInfo.new(
+				"U+0647",	#Common
+				"U+fee9", #Isolated
+				"U+feea", #Final
+				"U+feeb", #Initial
+				"U+feec", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Waw
+			new_character = ArabicCharacterInfo.new(
+				"U+0648",	#Common
+				"U+feed", #Isolated
+				"U+feee", #Final
+				"U+feed", #Initial
+				"U+feee", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		#Yeh
+			new_character = ArabicCharacterInfo.new(
+				"U+064a",	#Common
+				"U+fef1", #Isolated
+				"U+fef2", #Final
+				"U+fef3", #Initial
+				"U+fef4", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		#Hamza
+			new_character = ArabicCharacterInfo.new(
+				"U+0621",	#Common
+				"U+fe80", #Isolated
+				"U+fe80", #Final
+				"U+fe80", #Initial
+				"U+fe80", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		# Alef Madda
+			new_character = ArabicCharacterInfo.new(
+				"U+0622",	#Common
+				"U+fe81", #Isolated
+				"U+fe82", #Final
+				"U+fe81", #Initial
+				"U+fe82", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		# Alef Hamza Above
+			new_character = ArabicCharacterInfo.new(
+				"U+0623",	#Common
+				"U+fe83", #Isolated
+				"U+fe84", #Final
+				"U+fe83", #Initial
+				"U+fe84", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		# Waw Hamza
+			new_character = ArabicCharacterInfo.new(
+				"U+0624",	#Common
+				"U+fe85", #Isolated
+				"U+fe86", #Final
+				"U+fe85", #Initial
+				"U+fe86", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		# Alef Hamza Below
+			new_character = ArabicCharacterInfo.new(
+				"U+0625",	#Common
+				"U+fe87", #Isolated
+				"U+fe88", #Final
+				"U+fe87", #Initial
+				"U+fe88", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		# Yeh Hamza
+			new_character = ArabicCharacterInfo.new(
+				"U+0626",	#Common
+				"U+fe89", #Isolated
+				"U+fe8a", #Final
+				"U+fe8b", #Initial
+				"U+fe8c", #Medial
+				true)
+			map[new_character.common_encoding] = new_character
+		# Teh Marbuta
+			new_character = ArabicCharacterInfo.new(
+				"U+0629",	#Common
+				"U+fe93", #Isolated
+				"U+fe94", #Final
+				"U+fe93", #Initial
+				"U+fe94", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		# Tatweel
+			new_character = ArabicCharacterInfo.new(
+				"U+0640",	#Common
+				"U+0640", #Isolated
+				"U+0640", #Final
+				"U+0640", #Initial
+				"U+0640", #Medial
+				true)
+		map[new_character.common_encoding] = new_character
+		# Alef Layyena
+			new_character = ArabicCharacterInfo.new(
+				"U+0649",	#Common
+				"U+feef", #Isolated
+				"U+fef0", #Final
+				"U+feef", #Initial
+				"U+fef0", #Medial
+				false)
+		map[new_character.common_encoding] = new_character
+		@@arabic_characters_map = map
+		return @@arabic_characters_map
+	end
+end
+class String
+  def determine_format(before_c, after_c)
+		charmap = ArabicCharacterInfo.get_arabic_characters_map
+		previous_is_character = charmap.key?(before_c)
+		after_is_character = charmap.key?(after_c)
+		if !after_is_character and (!previous_is_character or !charmap[before_c].is_connected)
+			return CharacterFormat::Isolated
+		end
+		if !after_is_character
+				return CharacterFormat::Final
+		end
+		if !previous_is_character or !charmap[before_c].is_connected
+				return CharacterFormat::Initial
+		end
+		return CharacterFormat::Medial
+	end
+		def get_letter_in_format(format, c)
+			charmap = ArabicCharacterInfo.get_arabic_characters_map
+			character = charmap[c]
+			if character.nil?
+				return c
+			end
+			return character.format_encodings[format]
+		end
+		def fix_word
+				is_arabic = false
+				connected_arabic = ""
+				previous_letter = ''
+				before_previous_letter = ''
+				self.each_utf8_char {|c|
+					if previous_letter != ''
+						format = determine_format(before_previous_letter, c)
+						fixed_character = get_letter_in_format(format, previous_letter)
+						connected_arabic += fixed_character
+						if fixed_character != previous_letter
+							is_arabic = true
+						end
+					end
+					before_previous_letter = previous_letter
+					previous_letter = c
+			}
+				if previous_letter != ''
+						format = determine_format(before_previous_letter, '')
+						fixed_character = get_letter_in_format(format, previous_letter)
+						connected_arabic += fixed_character
+						if fixed_character != previous_letter
+							is_arabic = true
+						end
+					end
+			if is_arabic
+				return connected_arabic.reverse_utf8!
+			else
+				return connected_arabic
+			end
+		end
+		def fix_arabic_glyphs
+			words = self.split(" ")
+			result = ""
+			#assuming default is rtl
+			ltr_buffer = ""
+			words.each { |word|
+					fixed_word = word.fix_word
+					if(fixed_word == word)
+						#a non-arabic word (ltr) so we will buffer to see if more ltr words will follow
+						ltr_buffer = ltr_buffer + " " + fixed_word
+					else
+						if(ltr_buffer.empty?)
+							result = fixed_word + " " + result
+						else
+							result = ltr_buffer + " " + result
+							result = fixed_word + " " + result
+							ltr_buffer = ""
+						end
+					end
+			}
+			if(!(ltr_buffer.empty?))
+				result = ltr_buffer + " " + result
+			end
+			return result
+		end
+end

data/lib/string_utf_support.rb ADDED

@@ -0,0 +1,726 @@
+class String
+   require 'iconv'
+   require 'open-uri'      # cf. http://www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/index.html
+   # taken from: http://www.w3.org/International/questions/qa-forms-utf-8
+   UTF8REGEX = /\A(?:                               # ?: non-capturing group (grouping with no back references)
+                 [\x09\x0A\x0D\x20-\x7E]            # ASCII
+               | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
+               |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
+               | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
+               |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
+               |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
+               | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
+               |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
+               )*\z/mnx
+#  create UTF-8 character arrays (as class instance variables)
+#
+#  mapping tables: - http://www.unicode.org/Public/UCA/latest/allkeys.txt
+#                  - http://unicode.org/Public/UNIDATA/UnicodeData.txt
+#                  - http://unicode.org/Public/UNIDATA/CaseFolding.txt
+#                  - http://www.decodeunicode.org
+#                  - ftp://ftp.mars.org/pub/ruby/Unicode.tar.bz2
+#                  - http://camomile.sourceforge.net
+#                  - Character Palette (Mac OS X)
+   # test data
+   @small_letters_utf8 = ["U+00F1", "U+00F4", "U+00E6", "U+00F8", "U+00E0", "U+00E1", "U+00E2", "U+00E4", "U+00E5", "U+00E7", "U+00E8", "U+00E9", "U+00EA", "U+00EB", "U+0153"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
+   @capital_letters_utf8 = ["U+00D1", "U+00D4", "U+00C6", "U+00D8", "U+00C0", "U+00C1", "U+00C2", "U+00C4", "U+00C5", "U+00C7", "U+00C8", "U+00C9", "U+00CA", "U+00CB", "U+0152"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
+   @other_letters_utf8 = ["U+03A3", "U+0639", "U+0041", "U+F8D0", "U+F8FF", "U+4E2D", "U+F4EE", "U+00FE", "U+10FFFF", "U+00A9", "U+20AC", "U+221E", "U+20AC", "U+FEFF", "U+FFFD", "U+00FF", "U+00FE", "U+FFFE", "U+FEFF"].map { |x| u = [x[2..-1].hex].pack("U*"); u =~ UTF8REGEX ? u : nil }
+   if @small_letters_utf8.size != @small_letters_utf8.nitems then raise "Invalid UTF-8 char in @small_letters_utf8!" end
+   if @capital_letters_utf8.size != @capital_letters_utf8.nitems then raise "Invalid UTF-8 char in @capital_letters_utf8!" end
+   if @other_letters_utf8.size != @other_letters_utf8.nitems then raise "Invalid UTF-8 char in @other_letters_utf8!" end
+   @unicode_array = []
+   #open('http://unicode.org/Public/UNIDATA/UnicodeData.txt') do |f| f.each(nil) { |line| line.scan(/^[^;]+/) { |u| @unicode_array << u } }  end
+   #open('http://unicode.org/Public/UNIDATA/UnicodeData.txt') do |f|
+   #   f.each do |line| line =~ /LATIN|GREEK|CYRILLIC/  ?  ( line.scan(/^[^;]+/) { |u| @unicode_array << u } )  :  next  end
+   #end
+   #@letters_utf8 = @unicode_array.map { |x| u = [x.hex].pack("U*"); u =~ UTF8REGEX ? u : nil }.compact   # code points from UnicodeData.txt
+   @letters_utf8 = @small_letters_utf8 + @capital_letters_utf8 + @other_letters_utf8                      # test data only
+   # Hash[*array_with_keys.zip(array_with_values).flatten]
+   @downcase_table_utf8 = Hash[*@capital_letters_utf8.zip(@small_letters_utf8).flatten]
+   @upcase_table_utf8 = Hash[*@small_letters_utf8.zip(@capital_letters_utf8).flatten]
+   @letters_utf8_hash = Hash[*@letters_utf8.zip([]).flatten]    #=> ... "\341\272\242"=>nil ...
+   class << self
+      attr_accessor :small_letters_utf8
+      attr_accessor :capital_letters_utf8
+      attr_accessor :other_letters_utf8
+      attr_accessor :letters_utf8
+      attr_accessor :letters_utf8_hash
+      attr_accessor :unicode_array
+      attr_accessor :downcase_table_utf8
+      attr_accessor :upcase_table_utf8
+   end
+   def each_utf8_char
+      scan(/./mu) { |c| yield c }
+   end
+   def each_utf8_char_with_index
+      i = -1
+      scan(/./mu) { |c| i+=1; yield(c, i) }
+   end
+   def length_utf8
+      #scan(/./mu).size
+      count = 0
+      scan(/./mu) { count += 1 }
+      count
+   end
+   alias :size_utf8 :length_utf8
+   def reverse_utf8
+      split(//mu).reverse.join
+   end
+   def reverse_utf8!
+      split(//mu).reverse!.join
+   end
+   def swapcase_utf8
+     gsub(/./mu) do |char|
+         if !String.downcase_table_utf8[char].nil? then String.downcase_table_utf8[char]
+         elsif !String.upcase_table_utf8[char].nil? then String.upcase_table_utf8[char]
+         else char.swapcase
+         end
+      end
+   end
+   def swapcase_utf8!
+      gsub!(/./mu) do |char|
+         if !String.downcase_table_utf8[char].nil? then String.downcase_table_utf8[char]
+         elsif !String.upcase_table_utf8[char].nil? then String.upcase_table_utf8[char]
+         else ret = char.swapcase end
+      end
+   end
+   def downcase_utf8
+      gsub(/./mu) do |char|
+         small_char = String.downcase_table_utf8[char]
+         small_char.nil? ? char.downcase : small_char
+      end
+   end
+   def downcase_utf8!
+      gsub!(/./mu) do |char|
+         small_char = String.downcase_table_utf8[char]
+         small_char.nil? ? char.downcase : small_char
+      end
+   end
+   def upcase_utf8
+      gsub(/./mu) do |char|
+         capital_char = String.upcase_table_utf8[char]
+         capital_char.nil? ? char.upcase : capital_char
+      end
+   end
+   def upcase_utf8!
+      gsub!(/./mu) do |char|
+         capital_char = String.upcase_table_utf8[char]
+         capital_char.nil? ? char.upcase : capital_char
+      end
+   end
+   def count_utf8(c)
+      return nil if c.empty?
+      r = %r{[#{c}]}mu
+      scan(r).size
+   end
+   def delete_utf8(c)
+      return self if c.empty?
+      r = %r{[#{c}]}mu
+      gsub(r, '')
+   end
+   def delete_utf8!(c)
+      return self if c.empty?
+      r = %r{[#{c}]}mu
+      gsub!(r, '')
+   end
+   def first_utf8
+      self[/\A./mu]
+   end
+   def last_utf8
+      self[/.\z/mu]
+   end
+   def capitalize_utf8
+     return self if self =~ /\A[[:space:]]*\z/m
+     ret = ""
+     split(/\x20/).each do |w|
+         count = 0
+         w.gsub(/./mu) do |char|
+            count += 1
+            capital_char = String.upcase_table_utf8[char]
+            if count == 1 then
+               capital_char.nil? ? char.upcase : char.upcase_utf8
+            else
+               capital_char.nil? ? char.downcase : char.downcase_utf8
+            end
+         end
+         ret << w + ' '
+     end
+     ret =~ /\x20\z/ ? ret.sub!(/\x20\z/, '') : ret
+   end
+   def capitalize_utf8!
+     return self if self =~ /\A[[:space:]]*\z/m
+     ret = ""
+     split(/\x20/).each do |w|
+         count = 0
+         w.gsub!(/./mu) do |char|
+            count += 1
+            capital_char = String.upcase_table_utf8[char]
+            if count == 1 then
+               capital_char.nil? ? char.upcase : char.upcase_utf8
+            else
+               capital_char.nil? ? char.downcase : char.downcase_utf8
+            end
+         end
+         ret << w + ' '
+     end
+     ret =~ /\x20\z/ ? ret.sub!(/\x20\z/, '') : ret
+   end
+   def index_utf8(s)
+      return nil unless !self.empty? && (s.class == Regexp || s.class == String)
+      #raise(ArgumentError, "Wrong argument for method index_utf8!", caller) unless !self.empty? && (s.class == Regexp || s.class == String)
+      if s.class == Regexp
+         opts = s.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
+         if  opts.count('u') == 0 then opts = opts + "u" end
+         str = s.source
+         return nil if str.empty?
+         str = "%r{#{str}}" + opts
+         r = eval(str)
+         l = ""
+         sub(r) { l << $`; " " }  # $`: The string to the left of the last successful match (cf. http://www.zenspider.com/Languages/Ruby/QuickRef.html)
+         l.empty? ? nil : l.length_utf8
+      else
+         return nil if s.empty?
+         r = %r{#{s}}mu
+         l = ""
+         sub(r) { l << $`; " " }
+         l.empty? ? nil : l.length_utf8
+# this would be a non-regex solution
+=begin
+         return nil if s.empty?
+         return nil unless self =~ %r{#{s}}mu
+         indices = []
+         s.split(//mu).each do |x|
+            ar = []
+            self.each_utf8_char_with_index { |c,i| if c == x then ar << i end  }   # first get all matching indices c == x
+            indices << ar unless ar.empty?
+         end
+         if indices.empty?
+            return nil
+         elsif indices.size == 1
+            indices.first.first
+         else
+            #p indices
+            ret = []
+            a0 = indices.shift
+            a0.each do |i|
+               ret << i
+               indices.each { |a| if a.include?(i+1) then i += 1; ret << i else ret = []; break end  }
+               return ret.first unless ret.empty?
+            end
+            ret.empty? ? nil : ret.first
+         end
+=end
+      end
+   end
+   def rindex_utf8(s)
+      return nil unless !self.empty? && (s.class == Regexp || s.class == String)
+      #raise(ArgumentError, "Wrong argument for method index_utf8!", caller) unless !self.empty? && (s.class == Regexp || s.class == String)
+      if s.class == Regexp
+         opts = s.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
+         if  opts.count('u') == 0 then opts = opts + "u" end
+         str = s.source
+         return nil if str.empty?
+         str = "%r{#{str}}" + opts
+         r = eval(str)
+         l = ""
+         scan(r) { l = $` }
+         #gsub(r) { l = $`; " " }
+         l.empty? ? nil : l.length_utf8
+      else
+         return nil if s.empty?
+         r = %r{#{s}}mu
+         l = ""
+         scan(r) { l = $` }
+         #gsub(r) { l = $`; " " }
+         l.empty? ? nil : l.length_utf8
+      end
+   end
+   # note that the i option does not work in special cases with back references
+   # example: "��".slice_utf8(/(.).*?\1/i) returns nil whereas "aA".slice(/(.).*?\1/i) returns "aA"
+   def slice_utf8(regex)
+      opts = regex.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
+      if  opts.count('u') == 0 then opts = opts + "u" end
+      s = regex.source
+      str = "%r{#{s}}" + opts
+      r = eval(str)
+      slice(r)
+   end
+   def slice_utf8!(regex)
+      opts = regex.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
+      if  opts.count('u') == 0 then opts = opts + "u" end
+      s = regex.source
+      str = "%r{#{s}}" + opts
+      r = eval(str)
+      slice!(r)
+   end
+   def cut_utf8(p,l)    # (index) position, length
+      raise(ArgumentError, "Error: argument is not Fixnum", caller) if p.class != Fixnum or l.class != Fixnum
+      s = self.length_utf8
+      #if p < 0 then p = s - p.abs end
+      if p < 0 then p.abs > s ? (p = 0) : (p = s - p.abs) end      #  or:  ... p.abs > s ? (return nil) : ...
+      return nil if l > s or p > (s - 1)
+      ret = ""
+      count = 0
+      each_utf8_char_with_index do |c,i|
+         break if count >= l
+         if i >= p && count < l then count += 1; ret << c; end
+      end
+      ret
+   end
+   def starts_with_utf8?(s)
+      return nil if self.empty? or s.empty?
+      cut_utf8(0, s.size_utf8) == s
+   end
+   def ends_with_utf8?(s)
+      return nil if self.empty? or s.empty?
+      cut_utf8(-(s.size_utf8), s.size_utf8) == s
+   end
+   def insert_utf8(i,s)                                  # insert_utf8(index, string)
+      return self if s.empty?
+      l = self.length_utf8
+      if l == 0 then return s end
+      if i < 0 then i.abs > l ? (i = 0) : (i = l - i.abs) end          #  or:  ... i.abs > l ? (return nil) : ...
+      #return nil if i > (l - 1)                         # return nil ...
+      spaces = ""
+      if i > (l-1) then spaces = " " * (i - (l-1)) end   # ... or add spaces
+      str = self << spaces
+      s1 = str.cut_utf8(0, i)
+      s2 = str.cut_utf8(i, l - s1.length_utf8)
+      s1 << s << s2
+   end
+   def split_utf8(regex)
+      opts = regex.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
+      if  opts.count('u') == 0 then opts = opts + "u" end
+      s = regex.source
+      str = "%r{#{s}}" + opts
+      r = eval(str)
+      split(r)
+   end
+   def scan_utf8(regex)
+      opts = regex.inspect.gsub(/\A(.).*\1([eimnosux]*)\z/mu, '\2')
+      if  opts.count('u') == 0 then opts = opts + "u" end
+      s = regex.source
+      str = "%r{#{s}}" + opts
+      r = eval(str)
+      if block_given? then scan(r) { |a,*m| yield(a,*m) } else scan(r) end
+   end
+   def range_utf8(r)
+      return nil if r.class != Range
+      #raise(ArgumentError, "No Range object given!", caller) if r.class != Range
+      a = r.to_s[/^[\+\-]?\d+/].to_i
+      b = r.to_s[/[\+\-]?\d+$/].to_i
+      d = r.to_s[/\.+/]
+      if d.size == 2 then d = 2 else d = d.size end
+      l = self.length_utf8
+      return nil if b.abs > l || a.abs > l || d < 2 || d > 3
+      if a < 0 then a = l - a.abs end
+      if b < 0 then b = l - b.abs end
+      return nil if a > b
+      str = ""
+      each_utf8_char_with_index do |c,i|
+         break if i > b
+         if d == 2
+            (i >= a && i <= b) ? str << c : next
+         else
+            (i >= a && i < b) ? str << c : next
+         end
+      end
+      str
+   end
+   def utf8?
+     self =~ UTF8REGEX
+   end
+   def clean_utf8
+       t = ""
+       self.scan(/./um) { |c| t << c if c =~ UTF8REGEX }
+       t
+   end
+   def utf8_encoded_file?   # check (or rather guess) if (HTML) file encoding is UTF-8 (experimental, so use at your own risk!)
+      file = self
+      str = ""
+      if file =~ /^http:\/\//
+         url = file
+         if RUBY_PLATFORM =~ /darwin/i   # Mac OS X 10.4.10
+            seconds = 30
+            # check if web site is reachable
+            # on Windows try to use curb, http://curb.rubyforge.org (sudo gem install curb)
+            var = %x{ /usr/bin/curl -I -L --fail --silent --connect-timeout #{seconds} --max-time #{seconds+10} #{url}; /bin/echo -n $? }.to_i
+            #return false unless var == 0
+            raise "Failed to create connection to web site: #{url}  --  curl error code: #{var}  --  " unless var == 0
+            str = %x{ /usr/bin/curl -L --fail --silent --connect-timeout #{seconds} --max-time #{seconds+10} #{url} | \
+                      /usr/bin/grep -Eo -m 1 \"(charset|encoding)=[\\"']?[^\\"'>]+\" | /usr/bin/grep -Eo \"[^=\\"'>]+$\" }
+            p str
+            return true if str =~ /utf-?8/i
+            return false if !str.empty? && str !~ /utf-?8/i
+            # solutions with downloaded file
+            # download HTML file
+            #downloaded_file = "/tmp/html"
+            downloaded_file = "~/Desktop/html"
+            downloaded_file = File.expand_path(downloaded_file)
+            %x{ /usr/bin/touch #{downloaded_file} 2>/dev/null }
+            raise "No valid HTML download file (path) specified!" unless File.file?(downloaded_file)
+            %x{ /usr/bin/curl -L --fail --silent --connect-timeout #{seconds} --max-time #{seconds+10} -o #{downloaded_file} #{url} }
+            simple_test = %x{ /usr/bin/file -ik #{downloaded_file} }    #  cf. man file
+            p simple_test
+            # read entire file into a string
+            File.open(downloaded_file).read.each(nil) do |str|
+               #return true if str =~ /(charset|encoding) *= *["']? *utf-?8/i
+               str.utf8? ? (return true) : (return false)
+            end
+            #check each line of the downloaded file
+            #count_lines = 0
+            #count_utf8 = 0
+            #File.foreach(downloaded_file) { |line| return true if line =~ /(charset|encoding) *= *["']? *utf-?8/i; count_lines += 1;  count_utf8 += 1 if line.clean_utf8.utf8?; break if count_lines != count_utf8 }
+            #count_lines == count_utf8 ? (return true) : (return false)
+            # in-memory solutions
+            #html_file_cleaned_utf8 = %x{ /usr/bin/curl -L --fail --silent --connect-timeout #{seconds} --max-time #{seconds+10} #{url} }.clean_utf8
+            #p html_file_cleaned_utf8.utf8?
+            count_lines = 0
+            count_utf8 = 0
+            #%x{ /usr/bin/curl -L --fail --silent --connect-timeout #{seconds} --max-time #{seconds+10} #{url} }.each(nil) do |line|    # read entire file into string
+            %x{ /usr/bin/curl -L --fail --silent --connect-timeout #{seconds} --max-time #{seconds+10} #{url} }.each('\n') do |line|
+               #return true if line =~ /(charset|encoding) *= *["']? *utf-?8/i
+               count_lines += 1
+               count_utf8 += 1 if line.utf8?
+               break if count_lines != count_utf8
+            end
+            count_lines == count_utf8 ? (return true) : (return false)
+         else
+            # check each line of the HTML file (or the entire HTML file at once)
+            # cf. http://www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/index.html
+            count_lines = 0
+            count_utf8 = 0
+            open(url) do |f|
+               # p f.meta, f.content_encoding, f.content_type
+               cs = f.charset
+               return true if cs =~ /utf-?8/i
+               #f.each(nil) do |str| str.utf8? ? (return true) : (return false) end  # read entire file into string
+               f.each_line do |line|
+                  count_lines += 1
+                  count_utf8 += 1 if line.utf8?
+                  break unless count_lines == count_utf8
+               end
+            end
+            count_lines == count_utf8 ? (return true) : (return false)
+         end
+      else
+         return false unless File.file?(file)
+         if RUBY_PLATFORM =~ /darwin/i then str = %x{ /usr/bin/file -ik #{file} }; return true if str =~ /utf-?8/i end
+         # read entire file into a string
+         #File.open(file).read.each(nil) do |str| return true if str =~ /(charset|encoding) *= *["']? *utf-?8/i; str.utf8? ? (return true) : (return false) end
+         # check each line of the file
+         count_lines = 0
+         count_utf8 = 0
+         File.foreach(file) do |line|
+            return true if line =~ /(charset|encoding) *= *["']? *utf-?8/i
+            count_lines += 1;
+            count_utf8 += 1 if line.utf8?;
+            break if count_lines != count_utf8
+         end
+         count_lines == count_utf8 ? (return true) : (return false)
+      end
+      str =~ /utf-?8/i ? true : false
+   end
+   # cf. Paul Battley, http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
+   def validate_utf8
+      Iconv.iconv('UTF-8//IGNORE', 'UTF-8', (self + ' ') ).first[0..-2]
+   end
+   # cf. Paul Battley, http://www.ruby-forum.com/topic/70357
+   def asciify_utf8
+       return nil unless self.utf8?
+       #Iconv.iconv('US-ASCII//IGNORE//TRANSLIT', 'UTF-8', (self + ' ') ).first[0..-2]
+       # delete all punctuation characters inside words except "-" in words such as up-to-date
+       Iconv.iconv('US-ASCII//IGNORE//TRANSLIT', 'UTF-8', (self + ' ') ).first[0..-2].gsub(/(?!-.*)\b[[:punct:]]+\b/, '')
+   end
+   def latin1_to_utf8     # ISO-8859-1 to UTF-8
+      ret = Iconv.iconv("UTF-8//IGNORE", "ISO-8859-1", (self + "\x20") ).first[0..-2]
+      ret.utf8? ? ret : nil
+   end
+   def cp1252_to_utf8     # CP1252 (WINDOWS-1252) to UTF-8
+      ret = Iconv.iconv("UTF-8//IGNORE", "CP1252", (self + "\x20") ).first[0..-2]
+      ret.utf8? ? ret : nil
+   end
+   # cf. Paul Battley, http://www.ruby-forum.com/topic/70357
+   def utf16le_to_utf8
+       ret = Iconv.iconv('UTF-8//IGNORE', 'UTF-16LE', (self[0,(self.length/2*2)] + "\000\000") ).first[0..-2]
+       ret =~ /\x00\z/ ?  ret.sub!(/\x00\z/, '') : ret
+       ret.utf8? ? ret : nil
+   end
+   def utf8_to_utf16le
+      return nil unless self.utf8?
+      ret = Iconv.iconv('UTF-16LE//IGNORE', 'UTF-8', self ).first
+   end
+   def utf8_to_unicode
+      return nil unless self.utf8?
+      str = ""
+      scan(/./mu) { |c| str << "U+" << sprintf("%04X", c.unpack("U*").first) }
+      str
+   end
+   def unicode_to_utf8
+      return self if self =~ /\A[[:space:]]*\z/m
+      str = ""
+      #scan(/U\+([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})/) { |u| str << [u.first.hex].pack("U*") }
+      #scan(/U\+([[:digit:][:xdigit:]]{4,5}|10[[:digit:][:xdigit:]]{4})/) { |u| str << [u.first.hex].pack("U*") }
+      scan(/(U\+(?:[[:digit:][:xdigit:]]{4,5}|10[[:digit:][:xdigit:]]{4})|.)/mu) do        # for mixed strings such as "U+00bfHabla espaU+00f1ol?"
+         c = $1
+         if c =~ /^U\+/
+            str << [c[2..-1].hex].pack("U*")
+         else
+            str << c
+         end
+      end
+      str.utf8? ? str : nil
+   end
+   # dec, hex, oct conversions (experimental!)
+   def utf8_to_dec
+      return nil unless self.utf8?
+      str = ""
+      scan(/./mu) do |c|
+         if c =~ /^\x00$/
+            str << "aaa\x00"  # encode \x00 as "aaa"
+         else
+            str << sprintf("%04X", c.unpack("U*").first).hex.to_s << "\x00"   # convert to decimal
+         end
+      end
+      str[0..-2]
+   end
+   def dec_to_utf8   # \x00 is encoded as "aaa"
+      return self if self.empty?
+      return nil unless self =~ /\A[[:digit:]]+\x00/ && self =~ /\A[a[:digit:]\x00]+\z/
+      str = ""
+      split(/\x00/).each do |c|
+         if c.eql?("aaa")
+            str << "\x00"
+         else
+            str << [c.to_i].pack("U*")
+         end
+      end
+      str
+   end
+   def utf8_to_dec_2
+      return nil unless self.utf8?
+      str = ""
+      tmpstr = ""
+      null_str = "\x00"
+      scan(/./mu) do |c|
+         if c =~ /^\x00$/
+            str << "aaa\x00\x00"  # encode \x00 as "aaa"
+         else
+            tmpstr = ""
+            c.each_byte { |x| tmpstr << x.to_s << null_str }      # convert to decimal
+            str << tmpstr << null_str
+         end
+      end
+      str[0..-3]
+   end
+   def dec_to_utf8_2   # \x00 is encoded as "aaa"
+      return self if self.empty?
+      return nil unless self =~ /\A[[:digit:]]+\x00/ && self =~ /[[:digit:]]+\x00\x00/ && self =~ /\A[a[:digit:]\x00]+\z/
+      str = ""
+      split(/\x00\x00/).each do |c|
+         if c =~ /\x00/
+            c.split(/\x00/).each { |x| str << x.to_i.chr }
+         elsif c.eql?("aaa")
+            str << "\x00"
+         else
+            str << c.to_i.chr
+         end
+      end
+      str
+   end
+   def utf8_to_hex
+      return nil unless self.utf8?
+      str = ""
+      tmpstr = ""
+      null_str = "\x00"
+      scan(/./mu) do |c|
+         if c =~ /^\x00$/
+            str << "aaa\x00\x00"    # encode \x00 as "aaa"
+         else
+            tmpstr = ""
+            c.each_byte { |x| tmpstr << sprintf("%X", x) << null_str }      # convert to hexadecimal
+            str << tmpstr << null_str
+         end
+      end
+      str[0..-3]
+   end
+   def hex_to_utf8   # \x00 is encoded as "aaa"
+      return self if self.empty?
+      return nil unless self =~ /\A[[:xdigit:]]+\x00/ && self =~ /[[:xdigit:]]+\x00\x00/ && self =~ /\A[a[:xdigit:]\x00]+\z/
+      str = ""
+      split(/\x00\x00/).each do |c|
+         if c =~ /\x00/
+            c.split(/\x00/).each { |x| str << x.hex.chr }
+         elsif c.eql?("aaa")
+            str << "\x00"
+         else
+            str << c.hex.chr
+         end
+      end
+      str
+   end
+   def utf8_to_oct
+      return nil unless self.utf8?
+      str = ""
+      tmpstr = ""
+      null_str = "\x00"
+      scan(/./mu) do |c|
+         if c =~ /^\x00$/
+            str << "aaa\x00\x00"   # encode \x00 as "aaa"
+         else
+            tmpstr = ""
+            c.each_byte { |x| tmpstr << sprintf("%o", x) << null_str }      # convert to octal
+            str << tmpstr << null_str
+         end
+      end
+      str[0..-3]
+   end
+   def oct_to_utf8   # \x00 is encoded as "aaa"
+      return self if self.empty?
+      return nil unless self =~ /\A[[:digit:]]+\x00/ && self =~ /[[:digit:]]+\x00\x00/ && self =~ /\A[a[:digit:]\x00]+\z/
+      str = ""
+      split(/\x00\x00/).each do |c|
+         if c =~ /\x00/
+            c.split(/\x00/).each { |x| str << x.oct.chr }
+         elsif c.eql?("aaa")
+            str << "\x00"
+         else
+            str << c.oct.chr
+         end
+      end
+      str
+   end
+   # cf. http://node-0.mneisen.org/2007/03/13/email-subjects-in-utf-8-mit-ruby-kodieren/
+   def email_subject_utf8
+      return nil unless self.utf8?
+      "=?utf-8?b?#{[self].pack("m").delete("\n")}?="
+   end
+end

metadata ADDED

@@ -0,0 +1,67 @@
+--- !ruby/object:Gem::Specification
+name: Arabic-Prawn
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+    - 0
+    - 0
+    - 1
+  version: 0.0.1
+platform: ruby
+authors:
+  - Dynamix Solutions
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2010-02-28 00:00:00 +02:00
+default_executable:
+dependencies: []
+description: Allows printing arabic to PDFs generated by prawn
+email: ahmed.nasser@dynamix-systems.com
+executables: []
+extensions: []
+extra_rdoc_files:
+  - README
+  - LICENSE
+files:
+  - LICENSE
+  - README
+  - Rakefile
+  - lib/arabic-prawn.rb
+  - lib/string_utf_support.rb
+has_rdoc: true
+homepage:
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+  - lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+          - 0
+        version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+          - 0
+        version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.6
+signing_key:
+specification_version: 3
+summary: Allows printing arabic to PDFs generated by prawn
+test_files: []