RubyGems - mosaheh - Versions diffs - 0.0.1 → 0.0.2 - Mend

mosaheh 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/Guardfile +5 -0
data/lib/mosaheh.rb +2 -0
data/lib/mosaheh/core_ext.rb +3 -0
data/lib/mosaheh/core_ext/string.rb +17 -0
data/lib/mosaheh/encoder.rb +62 -97
data/lib/mosaheh/version.rb +1 -1
data/mosaheh.gemspec +6 -1
data/spec/lib/mosaheh/core_ext/string_spec.rb +15 -0
data/spec/lib/mosaheh/encoder_spec.rb +25 -0
data/spec/misencoded_samples/ar.txt +1 -0
data/spec/spec_helper.rb +8 -0
metadata +54 -5
data/spec/mosaheh/encoder_spec.rb +0 -17

data/Guardfile ADDED

@@ -0,0 +1,5 @@
+guard 'rspec', :cli => '--color --format nested' do
+  watch(%r{^spec/.+_spec\.rb$})
+  watch(%r{^lib/(.+)\.rb$})     { |m| "spec/lib/#{m[1]}_spec.rb" }
+  watch('spec/spec_helper.rb')  { "spec" }
+end

data/lib/mosaheh.rb CHANGED

@@ -1,6 +1,8 @@
 # encoding: UTF-8
 require "mosaheh/version"
+require 'mosaheh/core_ext'
 module Mosaheh
   autoload :Encoder, 'mosaheh/encoder'
 end

data/lib/mosaheh/core_ext.rb ADDED

@@ -0,0 +1,3 @@
+Dir["#{File.dirname(__FILE__)}/core_ext/*.rb"].each do |path|
+  require "mosaheh/core_ext/#{File.basename(path, '.rb')}"
+end

data/lib/mosaheh/core_ext/string.rb ADDED

@@ -0,0 +1,17 @@
+# encoding: utf-8
+class String
+  # Removes the first charecter from the String
+  # and returns it back
+  #
+  # @example
+  #   str = 'abc'
+  #   str.shift! # => 'a'
+  #   p str      # => 'bc'
+  # @return [String] The removed first charecter
+  def shift!
+    char = self[0]
+    self[0] = ''
+    char
+  end
+end

data/lib/mosaheh/encoder.rb CHANGED

@@ -1,118 +1,83 @@
 # encoding: UTF-8
 class Mosaheh::Encoder
-  # Arabic UTF-8 Block, from U+0600 to U+06FF seperated using '_*_'
-  AR = '؀_*_؁_*_؂_*_؃_*_؄_*_؅_*_؆_*_؇_*_؈_*_؉_*_؊_*_؋_*_،_*_؍_*_؎_*_؏_*_ؐ_*_ؑ_*_ؒ_*_ؓ_*_ؔ_*_ؕ_*_ؖ_*_ؗ_*_ؘ_*_ؙ_*_ؚ_*_؛_*_؜_*_؝_*_؞_*_؟_*_ؠ_*_ء_*_آ_*_أ_*_ؤ_*_إ_*_ئ_*_ا_*_ب_*_ة_*_ت_*_ث_*_ج_*_ح_*_خ_*_د_*_ذ_*_ر_*_ز_*_س_*_ش_*_ص_*_ض_*_ط_*_ظ_*_ع_*_غ_*_ػ_*_ؼ_*_ؽ_*_ؾ_*_ؿ_*_ـ_*_ف_*_ق_*_ك_*_ل_*_م_*_ن_*_ه_*_و_*_ى_*_ي_*_ً_*_ٌ_*_ٍ_*_َ_*_ُ_*_ِ_*_ّ_*_ْ_*_ٓ_*_ٔ_*_ٕ_*_ٖ_*_ٗ_*_٘_*_ٙ_*_ٚ_*_ٛ_*_ٜ_*_ٝ_*_ٞ_*_ٟ_*_٠_*_١_*_٢_*_٣_*_٤_*_٥_*_٦_*_٧_*_٨_*_٩_*_٪_*_٫_*_٬_*_٭_*_ٮ_*_ٯ_*_ٰ_*_ٱ_*_ٲ_*_ٳ_*_ٴ_*_ٵ_*_ٶ_*_ٷ_*_ٸ_*_ٹ_*_ٺ_*_ٻ_*_ټ_*_ٽ_*_پ_*_ٿ_*_ڀ_*_ځ_*_ڂ_*_ڃ_*_ڄ_*_څ_*_چ_*_ڇ_*_ڈ_*_ډ_*_ڊ_*_ڋ_*_ڌ_*_ڍ_*_ڎ_*_ڏ_*_ڐ_*_ڑ_*_ڒ_*_ړ_*_ڔ_*_ڕ_*_ږ_*_ڗ_*_ژ_*_ڙ_*_ښ_*_ڛ_*_ڜ_*_ڝ_*_ڞ_*_ڟ_*_ڠ_*_ڡ_*_ڢ_*_ڣ_*_ڤ_*_ڥ_*_ڦ_*_ڧ_*_ڨ_*_ک_*_ڪ_*_ګ_*_ڬ_*_ڭ_*_ڮ_*_گ_*_ڰ_*_ڱ_*_ڲ_*_ڳ_*_ڴ_*_ڵ_*_ڶ_*_ڷ_*_ڸ_*_ڹ_*_ں_*_ڻ_*_ڼ_*_ڽ_*_ھ_*_ڿ_*_ۀ_*_ہ_*_ۂ_*_ۃ_*_ۄ_*_ۅ_*_ۆ_*_ۇ_*_ۈ_*_ۉ_*_ۊ_*_ۋ_*_ی_*_ۍ_*_ێ_*_ۏ_*_ې_*_ۑ_*_ے_*_ۓ_*_۔_*_ە_*_ۖ_*_ۗ_*_ۘ_*_ۙ_*_ۚ_*_ۛ_*_ۜ_*_۝_*_۞_*_۟_*_۠_*_ۡ_*_ۢ_*_ۣ_*_ۤ_*_ۥ_*_ۦ_*_ۧ_*_ۨ_*_۩_*_۪_*_۫_*_۬_*_ۭ_*_ۮ_*_ۯ_*_۰_*_۱_*_۲_*_۳_*_۴_*_۵_*_۶_*_۷_*_۸_*_۹_*_ۺ_*_ۻ_*_ۼ_*_۽_*_۾_*_ۿ'
-  # U+0600 to U+06FF encoded using cp1252
-  BROKEN_AR = 'Ø€_*_Ø_*_Ø‚_*_Øƒ_*_Ø„_*_Ø…_*_Ø†_*_Ø‡_*_Øˆ_*_Ø‰_*_ØŠ_*_Ø‹_*_ØŒ_*_Ø_*_ØŽ_*_Ø_*_Ø_*_Ø‘_*_Ø’_*_Ø“_*_Ø”_*_Ø•_*_Ø–_*_Ø—_*_Ø˜_*_Ø™_*_Øš_*_Ø›_*_Øœ_*_Ø_*_Øž_*_ØŸ_*_Ø _*_Ø¡_*_Ø¢_*_Ø£_*_Ø¤_*_Ø¥_*_Ø¦_*_Ø§_*_Ø¨_*_Ø©_*_Øª_*_Ø«_*_Ø¬_*_Ø_*_Ø®_*_Ø¯_*_Ø°_*_Ø±_*_Ø²_*_Ø³_*_Ø´_*_Øµ_*_Ø¶_*_Ø·_*_Ø¸_*_Ø¹_*_Øº_*_Ø»_*_Ø¼_*_Ø½_*_Ø¾_*_Ø¿_*_Ù€_*_Ù_*_Ù‚_*_Ùƒ_*_Ù„_*_Ù…_*_Ù†_*_Ù‡_*_Ùˆ_*_Ù‰_*_ÙŠ_*_Ù‹_*_ÙŒ_*_Ù_*_ÙŽ_*_Ù_*_Ù_*_Ù‘_*_Ù’_*_Ù“_*_Ù”_*_Ù•_*_Ù–_*_Ù—_*_Ù˜_*_Ù™_*_Ùš_*_Ù›_*_Ùœ_*_Ù_*_Ùž_*_ÙŸ_*_Ù _*_Ù¡_*_Ù¢_*_Ù£_*_Ù¤_*_Ù¥_*_Ù¦_*_Ù§_*_Ù¨_*_Ù©_*_Ùª_*_Ù«_*_Ù¬_*_Ù_*_Ù®_*_Ù¯_*_Ù°_*_Ù±_*_Ù²_*_Ù³_*_Ù´_*_Ùµ_*_Ù¶_*_Ù·_*_Ù¸_*_Ù¹_*_Ùº_*_Ù»_*_Ù¼_*_Ù½_*_Ù¾_*_Ù¿_*_Ú€_*_Ú_*_Ú‚_*_Úƒ_*_Ú„_*_Ú…_*_Ú†_*_Ú‡_*_Úˆ_*_Ú‰_*_ÚŠ_*_Ú‹_*_ÚŒ_*_Ú_*_ÚŽ_*_Ú_*_Ú_*_Ú‘_*_Ú’_*_Ú“_*_Ú”_*_Ú•_*_Ú–_*_Ú—_*_Ú˜_*_Ú™_*_Úš_*_Ú›_*_Úœ_*_Ú_*_Úž_*_ÚŸ_*_Ú _*_Ú¡_*_Ú¢_*_Ú£_*_Ú¤_*_Ú¥_*_Ú¦_*_Ú§_*_Ú¨_*_Ú©_*_Úª_*_Ú«_*_Ú¬_*_Ú_*_Ú®_*_Ú¯_*_Ú°_*_Ú±_*_Ú²_*_Ú³_*_Ú´_*_Úµ_*_Ú¶_*_Ú·_*_Ú¸_*_Ú¹_*_Úº_*_Ú»_*_Ú¼_*_Ú½_*_Ú¾_*_Ú¿_*_Û€_*_Û_*_Û‚_*_Ûƒ_*_Û„_*_Û…_*_Û†_*_Û‡_*_Ûˆ_*_Û‰_*_ÛŠ_*_Û‹_*_ÛŒ_*_Û_*_ÛŽ_*_Û_*_Û_*_Û‘_*_Û’_*_Û“_*_Û”_*_Û•_*_Û–_*_Û—_*_Û˜_*_Û™_*_Ûš_*_Û›_*_Ûœ_*_Û_*_Ûž_*_ÛŸ_*_Û _*_Û¡_*_Û¢_*_Û£_*_Û¤_*_Û¥_*_Û¦_*_Û§_*_Û¨_*_Û©_*_Ûª_*_Û«_*_Û¬_*_Û_*_Û®_*_Û¯_*_Û°_*_Û±_*_Û²_*_Û³_*_Û´_*_Ûµ_*_Û¶_*_Û·_*_Û¸_*_Û¹_*_Ûº_*_Û»_*_Û¼_*_Û½_*_Û¾_*_Û¿'
+  # Initialize the encoder
+  def initialize
+    # UTF-8 bytes-sequences always begin with one of (0xD8 - 0xDB) for Arabic
+    @utf_8_beginning_chars = [*216..219].map(&:chr).join.force_encoding('cp1252')
-  def initialize options = {}
-    @replace_char = options[:replace_char] || '?'
+    # Misencoded sequences can be correctly re-encoded to utf-8, EXCEPT for one
+    # charecter with gets replaced with a space (ASCII for it: 32)!
+    @problem_char = 32.chr.force_encoding('cp1252')
-    generate_mappings_hash
+    # The correct replacement for the problem charecter
+    @correct_char = 160.chr.force_encoding('cp1252')
   end
-  def show_mappings_hash
-    i = 0
-    justification = 0;
-    mappings = ''
-    ar = AR.split('_*_')
-    @map.each do |broken, good|
-      str = "[#{broken.join(', ')}]"
-      justification = str.length if str.length > justification
-      str = str.ljust justification
-      str += ' => '
-      str += "[#{good.join(', ')}]"
-      str += " # #{ar[i]}\n"
-      mappings += str
-      i += 1
-    end
-    puts mappings
-  end
+  # Repairs Arabic (U+0600 - U+06FF) data
+  # which has been misencoded from cp1252 to UTF-8
+  # although the original data was UTF-8 encoded
+  #
+  # @param [String] Misencoded string
+  # @return [String] Correctly encoded utf-8 string
   def repair(str)
-    @broken = str.unpack('C*')
-    @repaired = []
-    while @broken.length > 0
+    # Data buffers
+    source = str.clone
+    fixed  = ""
+    # Each string needs a new converter instance
+    ec  = Encoding::Converter.new('utf-8', 'cp1252')
+    until source.empty?
-      # Try to use the mappings hash first and jump to the
-      # next sequence if we succeed
-      next if sequence_found_in_map
+      # Don't process correctly UTF-8
+      # encoded Arabic data
+      if is_arabic?(source[0])
+        fixed += source.shift!.force_encoding('cp1252')
+        next
+      end
-      # Try to handle ASCII chars if they are not a part
-      # of a broken sequence
-      next if byte_not_broken
+      state = ec.primitive_convert(source, fixed, nil, nil, Encoding::Converter::AFTER_OUTPUT)
+      # When an undefined sequence is found, we only move the
+      # 2nd byte to the fixed data, as it will be valid in UTF-8.
+      # For example: 129 is undefined in cp1252, but it is in UTF-8.
+      #   If we get a sequence like:
+      #   ec.last_error.error_char.unpack('C*') # => [194, 129]
+      #   We just ignore the 194 and add the 129 to the fixed data
+      if state == :undefined_conversion
+        c = ec.last_error.error_char.unpack('C*')[1].chr
+        fixed += c.force_encoding('cp1252')
+      end
-      handle_unknown_byte
+      # After each byte gets converted, check for the problem charecter
+      # and replace it if it's found
+      if state == :after_output && ends_with_problem?(fixed)
+        fixed.gsub!(/#{@problem_char}$/, @correct_char)
+      end
     end
-    result = @repaired.pack('C*')
-    @repaired = []
-    result
+    fixed.force_encoding('utf-8')
   end
 private
-  def generate_mappings_hash
-    @map = {}
-    ar = AR.split('_*_')
-    BROKEN_AR.split('_*_').each_with_index do |c, i|
-      @map[ c.unpack('C*') ] = ar[i].unpack('C*')
-    end
-  end
-  def sequence_found_in_map
-    (1..4).each do |i|
-      broken_seq = @broken[0..i]
-      if @map.has_key?(broken_seq)
-        @repaired += @map[broken_seq]
-        @broken.slice!(0, i + 1)
-        return true
-      end
-    end
-    false
+  # Used to test for correctly encoded UTF-8 Arabic
+  #
+  # @param [String] Data to test
+  # @return [Boolean]
+  def is_arabic?(str)
+    str =~ %r{([\u0600-\u06FF])+}u
   end
-  def byte_not_broken
-    if (
-        @broken.first != 195 && # The byte is not the beginning of a broken sequence
-        @broken.first <  256    # One byte char
-       ) || (
-        @broken.first == 195 &&          # The byte is the beginning of a sequence ...
-        !(152..155).include?(@broken[1]) # ... but the next one is not, so it's not a sequence!
-       )
-      # Add the byte to the repaired sequence and remove it from the broken one
-      @repaired << @broken.shift
-    end
-  end
-  def handle_unknown_byte
-    case
-    # Handle the case when the last 2 bytes are the beginning of
-    # a broken sequence but it's not found in the mappings hash.
-    # The best guess is that they're 2 one-byte chars.
-    #
-    # Handles: [195, (152 | 153 | 154 | 155)]
-    when @broken.length == 2
-      @repaired << @broken.first << @broken[1]
-      @broken.slice!(0, 2)
-    # If we are here, then we have no idea what is this byte!
-    # We will just use the replace_char to replace the unknown byte_not_broken
-    # in the repaired sequence
-    else
-      @repaired += @replace_char.bytes.to_a
-    end
+  # Used to check for the problem char in the end
+  # of a given String
+  #
+  # @param [String] Data to test
+  # @return [Boolean]
+  def ends_with_problem?(str)
+    str =~ %r{[#{@utf_8_beginning_chars}]#{@problem_char}$}
   end
 end

data/lib/mosaheh/version.rb CHANGED

@@ -1,4 +1,4 @@
 # encoding: UTF-8
 module Mosaheh
-  VERSION = "0.0.1"
+  VERSION = "0.0.2"
 end

data/mosaheh.gemspec CHANGED

@@ -18,5 +18,10 @@ Gem::Specification.new do |s|
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = ["lib"]
-  s.add_development_dependency "rspec"
+  s.add_development_dependency 'rspec'
+  s.add_development_dependency 'guard'
+  s.add_development_dependency 'guard-rspec'
+  # Linux notifications for guard
+  s.add_development_dependency 'rb-inotify'
+  s.add_development_dependency 'libnotify'
 end

data/spec/lib/mosaheh/core_ext/string_spec.rb ADDED

@@ -0,0 +1,15 @@
+# encoding: UTF-8
+require File.expand_path('../../../../spec_helper', __FILE__)
+describe String, '#shift!' do
+  let(:string) { 'abc' }
+  it 'should remove the first charecter' do
+    string.shift!
+    string.should == 'bc'
+  end
+  it 'should return the removed charecter' do
+    string.shift!.should == 'a'
+  end
+end

data/spec/lib/mosaheh/encoder_spec.rb ADDED

@@ -0,0 +1,25 @@
+# encoding: UTF-8
+require File.expand_path('../../../spec_helper', __FILE__)
+describe Mosaheh::Encoder do
+  let(:encoder) { Mosaheh::Encoder.new }
+  describe '#repair' do
+    it 'should repair the whole Arabic unicode codeblock' do
+      # Arabic: U+0060 - U+06FF
+      good_ar = (0..255).map{|i| eval '"\u06' + ("%02x" %  i) + '"'}.join ' '
+      broken_ar = load_misencoded_ar
+      encoder.repair(broken_ar).should == good_ar
+    end
+    it 'should not change ASCII chars' do
+      text = [*0..127].map(&:chr).join
+      encoder.repair(text).should == text
+    end
+    it 'should not change correctly encoded Arabic chars' do
+      text = 'إختبار'
+      encoder.repair(text).should == text
+    end
+  end
+end

data/spec/misencoded_samples/ar.txt ADDED

@@ -0,0 +1 @@

+ Ø€ Ø Ø‚ Øƒ Ø„ Ø… Ø† Ø‡ Øˆ Ø‰ ØŠ Ø‹ ØŒ Ø ØŽ Ø Ø Ø‘ Ø’ Ø“ Ø” Ø• Ø– Ø— Ø˜ Ø™ Øš Ø› Øœ Ø Øž ØŸ Ø Ø¡ Ø¢ Ø£ Ø¤ Ø¥ Ø¦ Ø§ Ø¨ Ø© Øª Ø« Ø¬ Ø Ø® Ø¯ Ø° Ø± Ø² Ø³ Ø´ Øµ Ø¶ Ø· Ø¸ Ø¹ Øº Ø» Ø¼ Ø½ Ø¾ Ø¿ Ù€ Ù Ù‚ Ùƒ Ù„ Ù… Ù† Ù‡ Ùˆ Ù‰ ÙŠ Ù‹ ÙŒ Ù ÙŽ Ù Ù Ù‘ Ù’ Ù“ Ù” Ù• Ù– Ù— Ù˜ Ù™ Ùš Ù› Ùœ Ù Ùž ÙŸ Ù Ù¡ Ù¢ Ù£ Ù¤ Ù¥ Ù¦ Ù§ Ù¨ Ù© Ùª Ù« Ù¬ Ù Ù® Ù¯ Ù° Ù± Ù² Ù³ Ù´ Ùµ Ù¶ Ù· Ù¸ Ù¹ Ùº Ù» Ù¼ Ù½ Ù¾ Ù¿ Ú€ Ú Ú‚ Úƒ Ú„ Ú… Ú† Ú‡ Úˆ Ú‰ ÚŠ Ú‹ ÚŒ Ú ÚŽ Ú Ú Ú‘ Ú’ Ú“ Ú” Ú• Ú– Ú— Ú˜ Ú™ Úš Ú› Úœ Ú Úž ÚŸ Ú Ú¡ Ú¢ Ú£ Ú¤ Ú¥ Ú¦ Ú§ Ú¨ Ú© Úª Ú« Ú¬ Ú Ú® Ú¯ Ú° Ú± Ú² Ú³ Ú´ Úµ Ú¶ Ú· Ú¸ Ú¹ Úº Ú» Ú¼ Ú½ Ú¾ Ú¿ Û€ Û Û‚ Ûƒ Û„ Û… Û† Û‡ Ûˆ Û‰ ÛŠ Û‹ ÛŒ Û ÛŽ Û Û Û‘ Û’ Û“ Û” Û• Û– Û— Û˜ Û™ Ûš Û› Ûœ Û Ûž ÛŸ Û Û¡ Û¢ Û£ Û¤ Û¥ Û¦ Û§ Û¨ Û© Ûª Û« Û¬ Û Û® Û¯ Û° Û± Û² Û³ Û´ Ûµ Û¶ Û· Û¸ Û¹ Ûº Û» Û¼ Û½ Û¾ Û¿

data/spec/spec_helper.rb CHANGED

@@ -7,3 +7,11 @@ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
 $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
 require 'mosaheh'
+def load_misencoded_sample(filename)
+  File.read("#{File.dirname(__FILE__)}/misencoded_samples/#{filename}").chomp
+end
+def load_misencoded_ar
+  load_misencoded_sample('ar.txt')
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: mosaheh
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-10-31 00:00:00.000000000Z
+date: 2011-11-05 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
-  requirement: &21735460 !ruby/object:Gem::Requirement
+  requirement: &18315980 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,7 +21,51 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *21735460
+  version_requirements: *18315980
+- !ruby/object:Gem::Dependency
+  name: guard
+  requirement: &18315480 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18315480
+- !ruby/object:Gem::Dependency
+  name: guard-rspec
+  requirement: &18314980 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18314980
+- !ruby/object:Gem::Dependency
+  name: rb-inotify
+  requirement: &18314480 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18314480
+- !ruby/object:Gem::Dependency
+  name: libnotify
+  requirement: &18313920 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *18313920
 description: Mosaheh repairs UTF-8 Arabic (U+0600 - U+06FF) text which has been mistakenly
   saved as single-byte latin1 encoding (cp1252). The biggest usecase for it is to
   repair Arabic data stored in MySql databases with the wrong encoding.
@@ -33,14 +77,19 @@ extra_rdoc_files: []
 files:
 - .gitignore
 - Gemfile
+- Guardfile
 - README.md
 - Rakefile
 - lib/mosaheh.rb
+- lib/mosaheh/core_ext.rb
+- lib/mosaheh/core_ext/string.rb
 - lib/mosaheh/encoder.rb
 - lib/mosaheh/version.rb
 - mosaheh.gemspec
 - spec/.rspec
-- spec/mosaheh/encoder_spec.rb
+- spec/lib/mosaheh/core_ext/string_spec.rb
+- spec/lib/mosaheh/encoder_spec.rb
+- spec/misencoded_samples/ar.txt
 - spec/spec_helper.rb
 homepage: ''
 licenses: []

data/spec/mosaheh/encoder_spec.rb DELETED

@@ -1,17 +0,0 @@
-# encoding: UTF-8
-require File.expand_path('../../spec_helper', __FILE__)
-describe Mosaheh::Encoder do
-  let(:encoder) { Mosaheh::Encoder.new }
-  describe '#repair' do
-    it 'should repair the whole Arabic unicode codeblock' do
-      encoder.repair(Mosaheh::Encoder::BROKEN_AR).unpack('C*').should eq Mosaheh::Encoder::AR.unpack('C*')
-    end
-    it 'should not change ASCII chars' do
-      text = [*0..127].map(&:chr).join
-      encoder.repair(text).should == text
-    end
-  end
-end