RubyGems - mosaheh - Versions diffs - 0.0.1 → 0.0.2 - Mend

mosaheh 0.0.1 → 0.0.2

Files changed (13) hide show

data/Guardfile +5 -0
data/lib/mosaheh.rb +2 -0
data/lib/mosaheh/core_ext.rb +3 -0
data/lib/mosaheh/core_ext/string.rb +17 -0
data/lib/mosaheh/encoder.rb +62 -97
data/lib/mosaheh/version.rb +1 -1
data/mosaheh.gemspec +6 -1
data/spec/lib/mosaheh/core_ext/string_spec.rb +15 -0
data/spec/lib/mosaheh/encoder_spec.rb +25 -0
data/spec/misencoded_samples/ar.txt +1 -0
data/spec/spec_helper.rb +8 -0
metadata +54 -5
data/spec/mosaheh/encoder_spec.rb +0 -17

data/Guardfile ADDED

@@ -0,0 +1,5 @@
+guard 'rspec', :cli => '--color --format nested' do
+  watch(%r{^spec/.+_spec\.rb$})
+  watch(%r{^lib/(.+)\.rb$})     { |m| "spec/lib/#{m[1]}_spec.rb" }
+  watch('spec/spec_helper.rb')  { "spec" }
+end

data/lib/mosaheh.rb CHANGED

@@ -1,6 +1,8 @@
 # encoding: UTF-8
 require "mosaheh/version"
+require 'mosaheh/core_ext'
 module Mosaheh
   autoload :Encoder, 'mosaheh/encoder'
 end

data/lib/mosaheh/core_ext.rb ADDED

@@ -0,0 +1,3 @@
+Dir["#{File.dirname(__FILE__)}/core_ext/*.rb"].each do |path|
+  require "mosaheh/core_ext/#{File.basename(path, '.rb')}"
+end

data/lib/mosaheh/core_ext/string.rb ADDED

@@ -0,0 +1,17 @@
+# encoding: utf-8
+class String
+  # Removes the first charecter from the String
+  # and returns it back
+  #
+  # @example
+  #   str = 'abc'
+  #   str.shift! # => 'a'
+  #   p str      # => 'bc'
+  # @return [String] The removed first charecter
+  def shift!
+    char = self[0]
+    self[0] = ''
+    char
+  end
+end

data/lib/mosaheh/encoder.rb CHANGED

@@ -1,118 +1,83 @@
 # encoding: UTF-8
 class Mosaheh::Encoder
-  # Arabic UTF-8 Block, from U+0600 to U+06FF seperated using '_*_'
-  AR = '؀_*_؁_*_؂_*_؃_*_؄_*_؅_*_؆_*_؇_*_؈_*_؉_*_؊_*_؋_*_،_*_؍_*_؎_*_؏_*_ؐ_*_ؑ_*_ؒ_*_ؓ_*_ؔ_*_ؕ_*_ؖ_*_ؗ_*_ؘ_*_ؙ_*_ؚ_*_؛_*_؜_*_؝_*_؞_*_؟_*_ؠ_*_ء_*_آ_*_أ_*_ؤ_*_إ_*_ئ_*_ا_*_ب_*_ة_*_ت_*_ث_*_ج_*_ح_*_خ_*_د_*_ذ_*_ر_*_ز_*_س_*_ش_*_ص_*_ض_*_ط_*_ظ_*_ع_*_غ_*_ػ_*_ؼ_*_ؽ_*_ؾ_*_ؿ_*_ـ_*_ف_*_ق_*_ك_*_ل_*_م_*_ن_*_ه_*_و_*_ى_*_ي_*_ً_*_ٌ_*_ٍ_*_َ_*_ُ_*_ِ_*_ّ_*_ْ_*_ٓ_*_ٔ_*_ٕ_*_ٖ_*_ٗ_*_٘_*_ٙ_*_ٚ_*_ٛ_*_ٜ_*_ٝ_*_ٞ_*_ٟ_*_٠_*_١_*_٢_*_٣_*_٤_*_٥_*_٦_*_٧_*_٨_*_٩_*_٪_*_٫_*_٬_*_٭_*_ٮ_*_ٯ_*_ٰ_*_ٱ_*_ٲ_*_ٳ_*_ٴ_*_ٵ_*_ٶ_*_ٷ_*_ٸ_*_ٹ_*_ٺ_*_ٻ_*_ټ_*_ٽ_*_پ_*_ٿ_*_ڀ_*_ځ_*_ڂ_*_ڃ_*_ڄ_*_څ_*_چ_*_ڇ_*_ڈ_*_ډ_*_ڊ_*_ڋ_*_ڌ_*_ڍ_*_ڎ_*_ڏ_*_ڐ_*_ڑ_*_ڒ_*_ړ_*_ڔ_*_ڕ_*_ږ_*_ڗ_*_ژ_*_ڙ_*_ښ_*_ڛ_*_ڜ_*_ڝ_*_ڞ_*_ڟ_*_ڠ_*_ڡ_*_ڢ_*_ڣ_*_ڤ_*_ڥ_*_ڦ_*_ڧ_*_ڨ_*_ک_*_ڪ_*_ګ_*_ڬ_*_ڭ_*_ڮ_*_گ_*_ڰ_*_ڱ_*_ڲ_*_ڳ_*_ڴ_*_ڵ_*_ڶ_*_ڷ_*_ڸ_*_ڹ_*_ں_*_ڻ_*_ڼ_*_ڽ_*_ھ_*_ڿ_*_ۀ_*_ہ_*_ۂ_*_ۃ_*_ۄ_*_ۅ_*_ۆ_*_ۇ_*_ۈ_*_ۉ_*_ۊ_*_ۋ_*_ی_*_ۍ_*_ێ_*_ۏ_*_ې_*_ۑ_*_ے_*_ۓ_*_۔_*_ە_*_ۖ_*_ۗ_*_ۘ_*_ۙ_*_ۚ_*_ۛ_*_ۜ_*_۝_*_۞_*_۟_*_۠_*_ۡ_*_ۢ_*_ۣ_*_ۤ_*_ۥ_*_ۦ_*_ۧ_*_ۨ_*_۩_*_۪_*_۫_*_۬_*_ۭ_*_ۮ_*_ۯ_*_۰_*_۱_*_۲_*_۳_*_۴_*_۵_*_۶_*_۷_*_۸_*_۹_*_ۺ_*_ۻ_*_ۼ_*_۽_*_۾_*_ۿ'
-  # U+0600 to U+06FF encoded using cp1252
-  BROKEN_AR = 'Ø€_*_Ø_*_Ø‚_*_Øƒ_*_Ø„_*_Ø…_*_Ø†_*_Ø‡_*_Øˆ_*_Ø‰_*_ØŠ_*_Ø‹_*_ØŒ_*_Ø_*_ØŽ_*_Ø_*_Ø_*_Ø‘_*_Ø’_*_Ø“_*_Ø”_*_Ø•_*_Ø–_*_Ø—_*_Ø˜_*_Ø™_*_Øš_*_Ø›_*_Øœ_*_Ø_*_Øž_*_ØŸ_*_Ø _*_Ø¡_*_Ø¢_*_Ø£_*_Ø¤_*_Ø¥_*_Ø¦_*_Ø§_*_Ø¨_*_Ø©_*_Øª_*_Ø«_*_Ø¬_*_Ø_*_Ø®_*_Ø¯_*_Ø°_*_Ø±_*_Ø²_*_Ø³_*_Ø´_*_Øµ_*_Ø¶_*_Ø·_*_Ø¸_*_Ø¹_*_Øº_*_Ø»_*_Ø¼_*_Ø½_*_Ø¾_*_Ø¿_*_Ù€_*_Ù_*_Ù‚_*_Ùƒ_*_Ù„_*_Ù…_*_Ù†_*_Ù‡_*_Ùˆ_*_Ù‰_*_ÙŠ_*_Ù‹_*_ÙŒ_*_Ù_*_ÙŽ_*_Ù_*_Ù_*_Ù‘_*_Ù’_*_Ù“_*_Ù”_*_Ù•_*_Ù–_*_Ù—_*_Ù˜_*_Ù™_*_Ùš_*_Ù›_*_Ùœ_*_Ù_*_Ùž_*_ÙŸ_*_Ù _*_Ù¡_*_Ù¢_*_Ù£_*_Ù¤_*_Ù¥_*_Ù¦_*_Ù§_*_Ù¨_*_Ù©_*_Ùª_*_Ù«_*_Ù¬_*_Ù_*_Ù®_*_Ù¯_*_Ù°_*_Ù±_*_Ù²_*_Ù³_*_Ù´_*_Ùµ_*_Ù¶_*_Ù·_*_Ù¸_*_Ù¹_*_Ùº_*_Ù»_*_Ù¼_*_Ù½_*_Ù¾_*_Ù¿_*_Ú€_*_Ú_*_Ú‚_*_Úƒ_*_Ú„_*_Ú…_*_Ú†_*_Ú‡_*_Úˆ_*_Ú‰_*_ÚŠ_*_Ú‹_*_ÚŒ_*_Ú_*_ÚŽ_*_Ú_*_Ú_*_Ú‘_*_Ú’_*_Ú“_*_Ú”_*_Ú•_*_Ú–_*_Ú—_*_Ú˜_*_Ú™_*_Úš_*_Ú›_*_Úœ_*_Ú_*_Úž_*_ÚŸ_*_Ú _*_Ú¡_*_Ú¢_*_Ú£_*_Ú¤_*_Ú¥_*_Ú¦_*_Ú§_*_Ú¨_*_Ú©_*_Úª_*_Ú«_*_Ú¬_*_Ú_*_Ú®_*_Ú¯_*_Ú°_*_Ú±_*_Ú²_*_Ú³_*_Ú´_*_Úµ_*_Ú¶_*_Ú·_*_Ú¸_*_Ú¹_*_Úº_*_Ú»_*_Ú¼_*_Ú½_*_Ú¾_*_Ú¿_*_Û€_*_Û_*_Û‚_*_Ûƒ_*_Û„_*_Û…_*_Û†_*_Û‡_*_Ûˆ_*_Û‰_*_ÛŠ_*_Û‹_*_ÛŒ_*_Û_*_ÛŽ_*_Û_*_Û_*_Û‘_*_Û’_*_Û“_*_Û”_*_Û•_*_Û–_*_Û—_*_Û˜_*_Û™_*_Ûš_*_Û›_*_Ûœ_*_Û_*_Ûž_*_ÛŸ_*_Û _*_Û¡_*_Û¢_*_Û£_*_Û¤_*_Û¥_*_Û¦_*_Û§_*_Û¨_*_Û©_*_Ûª_*_Û«_*_Û¬_*_Û_*_Û®_*_Û¯_*_Û°_*_Û±_*_Û²_*_Û³_*_Û´_*_Ûµ_*_Û¶_*_Û·_*_Û¸_*_Û¹_*_Ûº_*_Û»_*_Û¼_*_Û½_*_Û¾_*_Û¿'
+  # Initialize the encoder
+  def initialize
+    # UTF-8 bytes-sequences always begin with one of (0xD8 - 0xDB) for Arabic
+    @utf_8_beginning_chars = [*216..219].map(&:chr).join.force_encoding('cp1252')
-  def initialize options = {}
-    @replace_char = options[:replace_char] || '?'
+    # Misencoded sequences can be correctly re-encoded to utf-8, EXCEPT for one
+    # charecter with gets replaced with a space (ASCII for it: 32)!
+    @problem_char = 32.chr.force_encoding('cp1252')
-    generate_mappings_hash
+    # The correct replacement for the problem charecter
+    @correct_char = 160.chr.force_encoding('cp1252')
   end
-  def show_mappings_hash
-    i = 0
-    justification = 0;
-    mappings = ''
-    ar = AR.split('_*_')
-    @map.each do |broken, good|
-      str = "[#{broken.join(', ')}]"
-      justification = str.length if str.length > justification
-      str = str.ljust justification
-      str += ' => '
-      str += "[#{good.join(', ')}]"
-      str += " # #{ar[i]}\n"
-      mappings += str
-      i += 1
-    end
-    puts mappings
-  end
+  # Repairs Arabic (U+0600 - U+06FF) data
+  # which has been misencoded from cp1252 to UTF-8
+  # although the original data was UTF-8 encoded
+  #
+  # @param [String] Misencoded string
+  # @return [String] Correctly encoded utf-8 string
   def repair(str)
-    @broken = str.unpack('C*')
-    @repaired = []
-    while @broken.length > 0
+    # Data buffers
+    source = str.clone
+    fixed  = ""
+    # Each string needs a new converter instance
+    ec  = Encoding::Converter.new('utf-8', 'cp1252')
+    until source.empty?
-      # Try to use the mappings hash first and jump to the
-      # next sequence if we succeed
-      next if sequence_found_in_map
+      # Don't process correctly UTF-8
+      # encoded Arabic data
+      if is_arabic?(source[0])
+        fixed += source.shift!.force_encoding('cp1252')
+        next
+      end
-      # Try to handle ASCII chars if they are not a part
-      # of a broken sequence
-      next if byte_not_broken
+      state = ec.primitive_convert(source, fixed, nil, nil, Encoding::Converter::AFTER_OUTPUT)
+      # When an undefined sequence is found, we only move the
+      # 2nd byte to the fixed data, as it will be valid in UTF-8.
+      # For example: 129 is undefined in cp1252, but it is in UTF-8.
+      #   If we get a sequence like:
+      #   ec.last_error.error_char.unpack('C*') # => [194, 129]
+      #   We just ignore the 194 and add the 129 to the fixed data
+      if state == :undefined_conversion
+        c = ec.last_error.error_char.unpack('C*')[1].chr
+        fixed += c.force_encoding('cp1252')
+      end
-      handle_unknown_byte
+      # After each byte gets converted, check for the problem charecter
+      # and replace it if it's found
+      if state == :after_output && ends_with_problem?(fixed)
+        fixed.gsub!(/#{@problem_char}$/, @correct_char)
+      end
     end
-    result = @repaired.pack('C*')
-    @repaired = []
-    result
+    fixed.force_encoding('utf-8')
   end
 private
-  def generate_mappings_hash
-    @map = {}
-    ar = AR.split('_*_')
-    BROKEN_AR.split('_*_').each_with_index do |c, i|
-      @map[ c.unpack('C*') ] = ar[i].unpack('C*')
-    end
-  end
-  def sequence_found_in_map
-    (1..4).each do |i|
-      broken_seq = @broken[0..i]
-      if @map.has_key?(broken_seq)
-        @repaired += @map[broken_seq]
-        @broken.slice!(0, i + 1)
-        return true
-      end
-    end
-    false
+  # Used to test for correctly encoded UTF-8 Arabic
+  #
+  # @param [String] Data to test
+  # @return [Boolean]
+  def is_arabic?(str)
+    str =~ %r{([\u0600-\u06FF])+}u
   end
-  def byte_not_broken
-    if (
-        @broken.first != 195 && # The byte is not the beginning of a broken sequence
-        @broken.first <  256    # One byte char
-       ) || (
-        @broken.first == 195 &&          # The byte is the beginning of a sequence ...
-        !(152..155).include?(@broken[1]) # ... but the next one is not, so it's not a sequence!
-       )
-      # Add the byte to the repaired sequence and remove it from the broken one
-      @repaired << @broken.shift
-    end
-  end
-  def handle_unknown_byte
-    case
-    # Handle the case when the last 2 bytes are the beginning of
-    # a broken sequence but it's not found in the mappings hash.
-    # The best guess is that they're 2 one-byte chars.
-    #
-    # Handles: [195, (152 | 153 | 154 | 155)]
-    when @broken.length == 2
-      @repaired << @broken.first << @broken[1]
-      @broken.slice!(0, 2)
-    # If we are here, then we have no idea what is this byte!
-    # We will just use the replace_char to replace the unknown byte_not_broken
-    # in the repaired sequence
-    else
-      @repaired += @replace_char.bytes.to_a
-    end
+  # Used to check for the problem char in the end
+  # of a given String
+  #
+  # @param [String] Data to test
+  # @return [Boolean]
+  def ends_with_problem?(str)
+    str =~ %r{[#{@utf_8_beginning_chars}]#{@problem_char}$}
   end
 end

data/lib/mosaheh/version.rb CHANGED

@@ -1,4 +1,4 @@
 # encoding: UTF-8
 module Mosaheh
-  VERSION = "0.0.1"
+  VERSION = "0.0.2"
 end

data/mosaheh.gemspec CHANGED

@@ -18,5 +18,10 @@ Gem::Specification.new do |s|
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = ["lib"]
-  s.add_development_dependency "rspec"
+  s.add_development_dependency 'rspec'
+  s.add_development_dependency 'guard'
+  s.add_development_dependency 'guard-rspec'
+  # Linux notifications for guard
+  s.add_development_dependency 'rb-inotify'
+  s.add_development_dependency 'libnotify'
 end

data/spec/lib/mosaheh/core_ext/string_spec.rb ADDED

@@ -0,0 +1,15 @@
+# encoding: UTF-8
+require File.expand_path('../../../../spec_helper', __FILE__)
+describe String, '#shift!' do
+  let(:string) { 'abc' }
+  it 'should remove the first charecter' do
+    string.shift!
+    string.should == 'bc'
+  end
+  it 'should return the removed charecter' do
+    string.shift!.should == 'a'
+  end
+end

data/spec/lib/mosaheh/encoder_spec.rb ADDED

@@ -0,0 +1,25 @@
+# encoding: UTF-8
+require File.expand_path('../../../spec_helper', __FILE__)
+describe Mosaheh::Encoder do
+  let(:encoder) { Mosaheh::Encoder.new }
+  describe '#repair' do
+    it 'should repair the whole Arabic unicode codeblock' do
+      # Arabic: U+0060 - U+06FF
+      good_ar = (0..255).map{|i| eval '"\u06' + ("%02x" %  i) + '"'}.join ' '
+      broken_ar = load_misencoded_ar
+      encoder.repair(broken_ar).should == good_ar
+    end
+    it 'should not change ASCII chars' do
+      text = [*0..127].map(&:chr).join
+      encoder.repair(text).should == text
+    end
+    it 'should not change correctly encoded Arabic chars' do
+      text = 'إختبار'
+      encoder.repair(text).should == text
+    end
+  end
+end

data/spec/misencoded_samples/ar.txt ADDED

@@ -0,0 +1 @@

+ Ø€ Ø Ø‚ Øƒ Ø„ Ø… Ø† Ø‡ Øˆ Ø‰ ØŠ Ø‹ ØŒ Ø ØŽ Ø Ø Ø‘ Ø’ Ø“ Ø” Ø• Ø– Ø— Ø˜ Ø™ Øš Ø› Øœ Ø Øž ØŸ Ø Ø¡ Ø¢ Ø£ Ø¤ Ø¥ Ø¦ Ø§ Ø¨ Ø© Øª Ø« Ø¬ Ø Ø® Ø¯ Ø° Ø± Ø² Ø³ Ø´ Øµ Ø¶ Ø· Ø¸ Ø¹ Øº Ø» Ø¼ Ø½ Ø¾ Ø¿ Ù€ Ù Ù‚ Ùƒ Ù„ Ù… Ù† Ù‡ Ùˆ Ù‰ ÙŠ Ù‹ ÙŒ Ù ÙŽ Ù Ù Ù‘ Ù’ Ù“ Ù” Ù• Ù– Ù— Ù˜ Ù™ Ùš Ù› Ùœ Ù Ùž ÙŸ Ù Ù¡ Ù¢ Ù£ Ù¤ Ù¥ Ù¦ Ù§ Ù¨ Ù© Ùª Ù« Ù¬ Ù Ù® Ù¯ Ù° Ù± Ù² Ù³ Ù´ Ùµ Ù¶ Ù· Ù¸ Ù¹ Ùº Ù» Ù¼ Ù½ Ù¾ Ù¿ Ú€ Ú Ú‚ Úƒ Ú„ Ú… Ú† Ú‡ Úˆ Ú‰ ÚŠ Ú‹ ÚŒ Ú ÚŽ Ú Ú Ú‘ Ú’ Ú“ Ú” Ú• Ú– Ú— Ú˜ Ú™ Úš Ú› Úœ Ú Úž ÚŸ Ú Ú¡ Ú¢ Ú£ Ú¤ Ú¥ Ú¦ Ú§ Ú¨ Ú© Úª Ú« Ú¬ Ú Ú® Ú¯ Ú° Ú± Ú² Ú³ Ú´ Úµ Ú¶ Ú· Ú¸ Ú¹ Úº Ú» Ú¼ Ú½ Ú¾ Ú¿ Û€ Û Û‚ Ûƒ Û„ Û… Û† Û‡ Ûˆ Û‰ ÛŠ Û‹ ÛŒ Û ÛŽ Û Û Û‘ Û’ Û“ Û” Û• Û– Û— Û˜ Û™ Ûš Û› Ûœ Û Ûž ÛŸ Û Û¡ Û¢ Û£ Û¤ Û¥ Û¦ Û§ Û¨ Û© Ûª Û« Û¬ Û Û® Û¯ Û° Û± Û² Û³ Û´ Ûµ Û¶ Û· Û¸ Û¹ Ûº Û» Û¼ Û½ Û¾ Û¿

data/spec/spec_helper.rb CHANGED

@@ -7,3 +7,11 @@ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
 $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
 require 'mosaheh'
+def load_misencoded_sample(filename)
+  File.read("#{File.dirname(__FILE__)}/misencoded_samples/#{filename}").chomp
+end
+def load_misencoded_ar
+  load_misencoded_sample('ar.txt')
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: mosaheh
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-10-31 00:00:00.000000000Z
+date: 2011-11-05 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
-  requirement: &21735460 !ruby/object:Gem::Requirement
+  requirement: &18315980 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,7 +21,51 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *21735460
+  version_requirements: *18315980
+- !ruby/object:Gem::Dependency
+  name: guard
+  requirement: &18315480 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18315480
+- !ruby/object:Gem::Dependency
+  name: guard-rspec
+  requirement: &18314980 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18314980
+- !ruby/object:Gem::Dependency
+  name: rb-inotify
+  requirement: &18314480 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18314480
+- !ruby/object:Gem::Dependency
+  name: libnotify
+  requirement: &18313920 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18313920
 description: Mosaheh repairs UTF-8 Arabic (U+0600 - U+06FF) text which has been mistakenly
   saved as single-byte latin1 encoding (cp1252). The biggest usecase for it is to
   repair Arabic data stored in MySql databases with the wrong encoding.
@@ -33,14 +77,19 @@ extra_rdoc_files: []
 files:
 - .gitignore
 - Gemfile
+- Guardfile
 - README.md
 - Rakefile
 - lib/mosaheh.rb
+- lib/mosaheh/core_ext.rb
+- lib/mosaheh/core_ext/string.rb
 - lib/mosaheh/encoder.rb
 - lib/mosaheh/version.rb
 - mosaheh.gemspec
 - spec/.rspec
-- spec/mosaheh/encoder_spec.rb
+- spec/lib/mosaheh/core_ext/string_spec.rb
+- spec/lib/mosaheh/encoder_spec.rb
+- spec/misencoded_samples/ar.txt
 - spec/spec_helper.rb
 homepage: ''
 licenses: []

data/spec/mosaheh/encoder_spec.rb DELETED

@@ -1,17 +0,0 @@
-# encoding: UTF-8
-require File.expand_path('../../spec_helper', __FILE__)
-describe Mosaheh::Encoder do
-  let(:encoder) { Mosaheh::Encoder.new }
-  describe '#repair' do
-    it 'should repair the whole Arabic unicode codeblock' do
-      encoder.repair(Mosaheh::Encoder::BROKEN_AR).unpack('C*').should eq Mosaheh::Encoder::AR.unpack('C*')
-    end
-    it 'should not change ASCII chars' do
-      text = [*0..127].map(&:chr).join
-      encoder.repair(text).should == text
-    end
-  end
-end