mosaheh 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ guard 'rspec', :cli => '--color --format nested' do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ end
@@ -1,6 +1,8 @@
1
1
  # encoding: UTF-8
2
2
  require "mosaheh/version"
3
3
 
4
+ require 'mosaheh/core_ext'
5
+
4
6
  module Mosaheh
5
7
  autoload :Encoder, 'mosaheh/encoder'
6
8
  end
@@ -0,0 +1,3 @@
1
+ Dir["#{File.dirname(__FILE__)}/core_ext/*.rb"].each do |path|
2
+ require "mosaheh/core_ext/#{File.basename(path, '.rb')}"
3
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ class String
3
+
4
+ # Removes the first charecter from the String
5
+ # and returns it back
6
+ #
7
+ # @example
8
+ # str = 'abc'
9
+ # str.shift! # => 'a'
10
+ # p str # => 'bc'
11
+ # @return [String] The removed first charecter
12
+ def shift!
13
+ char = self[0]
14
+ self[0] = ''
15
+ char
16
+ end
17
+ end
@@ -1,118 +1,83 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  class Mosaheh::Encoder
4
-
5
- # Arabic UTF-8 Block, from U+0600 to U+06FF seperated using '_*_'
6
- AR = '؀_*_؁_*_؂_*_؃_*_؄_*_؅_*_؆_*_؇_*_؈_*_؉_*_؊_*_؋_*_،_*_؍_*_؎_*_؏_*_ؐ_*_ؑ_*_ؒ_*_ؓ_*_ؔ_*_ؕ_*_ؖ_*_ؗ_*_ؘ_*_ؙ_*_ؚ_*_؛_*_؜_*_؝_*_؞_*_؟_*_ؠ_*_ء_*_آ_*_أ_*_ؤ_*_إ_*_ئ_*_ا_*_ب_*_ة_*_ت_*_ث_*_ج_*_ح_*_خ_*_د_*_ذ_*_ر_*_ز_*_س_*_ش_*_ص_*_ض_*_ط_*_ظ_*_ع_*_غ_*_ػ_*_ؼ_*_ؽ_*_ؾ_*_ؿ_*_ـ_*_ف_*_ق_*_ك_*_ل_*_م_*_ن_*_ه_*_و_*_ى_*_ي_*_ً_*_ٌ_*_ٍ_*_َ_*_ُ_*_ِ_*_ّ_*_ْ_*_ٓ_*_ٔ_*_ٕ_*_ٖ_*_ٗ_*_٘_*_ٙ_*_ٚ_*_ٛ_*_ٜ_*_ٝ_*_ٞ_*_ٟ_*_٠_*_١_*_٢_*_٣_*_٤_*_٥_*_٦_*_٧_*_٨_*_٩_*_٪_*_٫_*_٬_*_٭_*_ٮ_*_ٯ_*_ٰ_*_ٱ_*_ٲ_*_ٳ_*_ٴ_*_ٵ_*_ٶ_*_ٷ_*_ٸ_*_ٹ_*_ٺ_*_ٻ_*_ټ_*_ٽ_*_پ_*_ٿ_*_ڀ_*_ځ_*_ڂ_*_ڃ_*_ڄ_*_څ_*_چ_*_ڇ_*_ڈ_*_ډ_*_ڊ_*_ڋ_*_ڌ_*_ڍ_*_ڎ_*_ڏ_*_ڐ_*_ڑ_*_ڒ_*_ړ_*_ڔ_*_ڕ_*_ږ_*_ڗ_*_ژ_*_ڙ_*_ښ_*_ڛ_*_ڜ_*_ڝ_*_ڞ_*_ڟ_*_ڠ_*_ڡ_*_ڢ_*_ڣ_*_ڤ_*_ڥ_*_ڦ_*_ڧ_*_ڨ_*_ک_*_ڪ_*_ګ_*_ڬ_*_ڭ_*_ڮ_*_گ_*_ڰ_*_ڱ_*_ڲ_*_ڳ_*_ڴ_*_ڵ_*_ڶ_*_ڷ_*_ڸ_*_ڹ_*_ں_*_ڻ_*_ڼ_*_ڽ_*_ھ_*_ڿ_*_ۀ_*_ہ_*_ۂ_*_ۃ_*_ۄ_*_ۅ_*_ۆ_*_ۇ_*_ۈ_*_ۉ_*_ۊ_*_ۋ_*_ی_*_ۍ_*_ێ_*_ۏ_*_ې_*_ۑ_*_ے_*_ۓ_*_۔_*_ە_*_ۖ_*_ۗ_*_ۘ_*_ۙ_*_ۚ_*_ۛ_*_ۜ_*_۝_*_۞_*_۟_*_۠_*_ۡ_*_ۢ_*_ۣ_*_ۤ_*_ۥ_*_ۦ_*_ۧ_*_ۨ_*_۩_*_۪_*_۫_*_۬_*_ۭ_*_ۮ_*_ۯ_*_۰_*_۱_*_۲_*_۳_*_۴_*_۵_*_۶_*_۷_*_۸_*_۹_*_ۺ_*_ۻ_*_ۼ_*_۽_*_۾_*_ۿ'
7
4
 
8
- # U+0600 to U+06FF encoded using cp1252
9
- BROKEN_AR = 'Ø€_*_؁_*_Ø‚_*_؃_*_Ø„_*_Ø…_*_؆_*_؇_*_؈_*_؉_*_ØŠ_*_Ø‹_*_ØŒ_*_؍_*_ØŽ_*_؏_*_ؐ_*_Ø‘_*_Ø’_*_Ø“_*_Ø”_*_Ø•_*_Ø–_*_Ø—_*_ؘ_*_Ø™_*_Øš_*_Ø›_*_Øœ_*_؝_*_Øž_*_ØŸ_*_Ø _*_Ø¡_*_Ø¢_*_Ø£_*_ؤ_*_Ø¥_*_ئ_*_ا_*_ب_*_Ø©_*_ت_*_Ø«_*_ج_*_Ø­_*_Ø®_*_د_*_ذ_*_ر_*_ز_*_س_*_Ø´_*_ص_*_ض_*_Ø·_*_ظ_*_ع_*_غ_*_Ø»_*_ؼ_*_ؽ_*_ؾ_*_Ø¿_*_Ù€_*_ف_*_Ù‚_*_Ùƒ_*_Ù„_*_Ù…_*_Ù†_*_Ù‡_*_Ùˆ_*_Ù‰_*_ÙŠ_*_Ù‹_*_ÙŒ_*_ٍ_*_ÙŽ_*_ُ_*_ِ_*_Ù‘_*_Ù’_*_Ù“_*_Ù”_*_Ù•_*_Ù–_*_Ù—_*_Ù˜_*_Ù™_*_Ùš_*_Ù›_*_Ùœ_*_ٝ_*_Ùž_*_ÙŸ_*_Ù _*_Ù¡_*_Ù¢_*_Ù£_*_Ù¤_*_Ù¥_*_Ù¦_*_Ù§_*_Ù¨_*_Ù©_*_Ùª_*_Ù«_*_Ù¬_*_Ù­_*_Ù®_*_Ù¯_*_Ù°_*_Ù±_*_Ù²_*_Ù³_*_Ù´_*_Ùµ_*_Ù¶_*_Ù·_*_Ù¸_*_Ù¹_*_Ùº_*_Ù»_*_Ù¼_*_Ù½_*_Ù¾_*_Ù¿_*_Ú€_*_ځ_*_Ú‚_*_Úƒ_*_Ú„_*_Ú…_*_Ú†_*_Ú‡_*_Úˆ_*_Ú‰_*_ÚŠ_*_Ú‹_*_ÚŒ_*_ڍ_*_ÚŽ_*_ڏ_*_ڐ_*_Ú‘_*_Ú’_*_Ú“_*_Ú”_*_Ú•_*_Ú–_*_Ú—_*_Ú˜_*_Ú™_*_Úš_*_Ú›_*_Úœ_*_ڝ_*_Úž_*_ÚŸ_*_Ú _*_Ú¡_*_Ú¢_*_Ú£_*_Ú¤_*_Ú¥_*_Ú¦_*_Ú§_*_Ú¨_*_Ú©_*_Úª_*_Ú«_*_Ú¬_*_Ú­_*_Ú®_*_Ú¯_*_Ú°_*_Ú±_*_Ú²_*_Ú³_*_Ú´_*_Úµ_*_Ú¶_*_Ú·_*_Ú¸_*_Ú¹_*_Úº_*_Ú»_*_Ú¼_*_Ú½_*_Ú¾_*_Ú¿_*_Û€_*_ہ_*_Û‚_*_Ûƒ_*_Û„_*_Û…_*_Û†_*_Û‡_*_Ûˆ_*_Û‰_*_ÛŠ_*_Û‹_*_ÛŒ_*_ۍ_*_ÛŽ_*_ۏ_*_ې_*_Û‘_*_Û’_*_Û“_*_Û”_*_Û•_*_Û–_*_Û—_*_Û˜_*_Û™_*_Ûš_*_Û›_*_Ûœ_*_۝_*_Ûž_*_ÛŸ_*_Û _*_Û¡_*_Û¢_*_Û£_*_Û¤_*_Û¥_*_Û¦_*_Û§_*_Û¨_*_Û©_*_Ûª_*_Û«_*_Û¬_*_Û­_*_Û®_*_Û¯_*_Û°_*_Û±_*_Û²_*_Û³_*_Û´_*_Ûµ_*_Û¶_*_Û·_*_Û¸_*_Û¹_*_Ûº_*_Û»_*_Û¼_*_Û½_*_Û¾_*_Û¿'
5
+ # Initialize the encoder
6
+ def initialize
7
+ # UTF-8 bytes-sequences always begin with one of (0xD8 - 0xDB) for Arabic
8
+ @utf_8_beginning_chars = [*216..219].map(&:chr).join.force_encoding('cp1252')
10
9
 
11
- def initialize options = {}
12
- @replace_char = options[:replace_char] || '?'
10
+ # Misencoded sequences can be correctly re-encoded to utf-8, EXCEPT for one
11
+ # charecter with gets replaced with a space (ASCII for it: 32)!
12
+ @problem_char = 32.chr.force_encoding('cp1252')
13
13
 
14
- generate_mappings_hash
14
+ # The correct replacement for the problem charecter
15
+ @correct_char = 160.chr.force_encoding('cp1252')
15
16
  end
16
-
17
- def show_mappings_hash
18
- i = 0
19
- justification = 0;
20
- mappings = ''
21
- ar = AR.split('_*_')
22
-
23
- @map.each do |broken, good|
24
-
25
- str = "[#{broken.join(', ')}]"
26
-
27
- justification = str.length if str.length > justification
28
- str = str.ljust justification
29
-
30
- str += ' => '
31
- str += "[#{good.join(', ')}]"
32
- str += " # #{ar[i]}\n"
33
-
34
- mappings += str
35
-
36
- i += 1
37
- end
38
-
39
- puts mappings
40
- end
41
-
17
+
18
+ # Repairs Arabic (U+0600 - U+06FF) data
19
+ # which has been misencoded from cp1252 to UTF-8
20
+ # although the original data was UTF-8 encoded
21
+ #
22
+ # @param [String] Misencoded string
23
+ # @return [String] Correctly encoded utf-8 string
42
24
  def repair(str)
43
- @broken = str.unpack('C*')
44
- @repaired = []
45
-
46
- while @broken.length > 0
25
+
26
+ # Data buffers
27
+ source = str.clone
28
+ fixed = ""
29
+
30
+ # Each string needs a new converter instance
31
+ ec = Encoding::Converter.new('utf-8', 'cp1252')
32
+
33
+ until source.empty?
47
34
 
48
- # Try to use the mappings hash first and jump to the
49
- # next sequence if we succeed
50
- next if sequence_found_in_map
35
+ # Don't process correctly UTF-8
36
+ # encoded Arabic data
37
+ if is_arabic?(source[0])
38
+ fixed += source.shift!.force_encoding('cp1252')
39
+ next
40
+ end
51
41
 
52
- # Try to handle ASCII chars if they are not a part
53
- # of a broken sequence
54
- next if byte_not_broken
42
+ state = ec.primitive_convert(source, fixed, nil, nil, Encoding::Converter::AFTER_OUTPUT)
43
+
44
+ # When an undefined sequence is found, we only move the
45
+ # 2nd byte to the fixed data, as it will be valid in UTF-8.
46
+ # For example: 129 is undefined in cp1252, but it is in UTF-8.
47
+ # If we get a sequence like:
48
+ # ec.last_error.error_char.unpack('C*') # => [194, 129]
49
+ # We just ignore the 194 and add the 129 to the fixed data
50
+ if state == :undefined_conversion
51
+ c = ec.last_error.error_char.unpack('C*')[1].chr
52
+ fixed += c.force_encoding('cp1252')
53
+ end
55
54
 
56
- handle_unknown_byte
55
+ # After each byte gets converted, check for the problem charecter
56
+ # and replace it if it's found
57
+ if state == :after_output && ends_with_problem?(fixed)
58
+ fixed.gsub!(/#{@problem_char}$/, @correct_char)
59
+ end
57
60
  end
58
61
 
59
- result = @repaired.pack('C*')
60
- @repaired = []
61
- result
62
+ fixed.force_encoding('utf-8')
62
63
  end
63
64
 
64
65
  private
65
66
 
66
- def generate_mappings_hash
67
- @map = {}
68
- ar = AR.split('_*_')
69
-
70
- BROKEN_AR.split('_*_').each_with_index do |c, i|
71
- @map[ c.unpack('C*') ] = ar[i].unpack('C*')
72
- end
73
- end
74
-
75
- def sequence_found_in_map
76
- (1..4).each do |i|
77
- broken_seq = @broken[0..i]
78
- if @map.has_key?(broken_seq)
79
- @repaired += @map[broken_seq]
80
- @broken.slice!(0, i + 1)
81
- return true
82
- end
83
- end
84
- false
67
+ # Used to test for correctly encoded UTF-8 Arabic
68
+ #
69
+ # @param [String] Data to test
70
+ # @return [Boolean]
71
+ def is_arabic?(str)
72
+ str =~ %r{([\u0600-\u06FF])+}u
85
73
  end
86
74
 
87
- def byte_not_broken
88
- if (
89
- @broken.first != 195 && # The byte is not the beginning of a broken sequence
90
- @broken.first < 256 # One byte char
91
- ) || (
92
- @broken.first == 195 && # The byte is the beginning of a sequence ...
93
- !(152..155).include?(@broken[1]) # ... but the next one is not, so it's not a sequence!
94
- )
95
- # Add the byte to the repaired sequence and remove it from the broken one
96
- @repaired << @broken.shift
97
- end
98
- end
99
-
100
- def handle_unknown_byte
101
- case
102
- # Handle the case when the last 2 bytes are the beginning of
103
- # a broken sequence but it's not found in the mappings hash.
104
- # The best guess is that they're 2 one-byte chars.
105
- #
106
- # Handles: [195, (152 | 153 | 154 | 155)]
107
- when @broken.length == 2
108
- @repaired << @broken.first << @broken[1]
109
- @broken.slice!(0, 2)
110
-
111
- # If we are here, then we have no idea what is this byte!
112
- # We will just use the replace_char to replace the unknown byte_not_broken
113
- # in the repaired sequence
114
- else
115
- @repaired += @replace_char.bytes.to_a
116
- end
75
+ # Used to check for the problem char in the end
76
+ # of a given String
77
+ #
78
+ # @param [String] Data to test
79
+ # @return [Boolean]
80
+ def ends_with_problem?(str)
81
+ str =~ %r{[#{@utf_8_beginning_chars}]#{@problem_char}$}
117
82
  end
118
83
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: UTF-8
2
2
  module Mosaheh
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
@@ -18,5 +18,10 @@ Gem::Specification.new do |s|
18
18
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
19
  s.require_paths = ["lib"]
20
20
 
21
- s.add_development_dependency "rspec"
21
+ s.add_development_dependency 'rspec'
22
+ s.add_development_dependency 'guard'
23
+ s.add_development_dependency 'guard-rspec'
24
+ # Linux notifications for guard
25
+ s.add_development_dependency 'rb-inotify'
26
+ s.add_development_dependency 'libnotify'
22
27
  end
@@ -0,0 +1,15 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path('../../../../spec_helper', __FILE__)
3
+
4
+ describe String, '#shift!' do
5
+ let(:string) { 'abc' }
6
+
7
+ it 'should remove the first charecter' do
8
+ string.shift!
9
+ string.should == 'bc'
10
+ end
11
+
12
+ it 'should return the removed charecter' do
13
+ string.shift!.should == 'a'
14
+ end
15
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path('../../../spec_helper', __FILE__)
3
+
4
+ describe Mosaheh::Encoder do
5
+ let(:encoder) { Mosaheh::Encoder.new }
6
+
7
+ describe '#repair' do
8
+ it 'should repair the whole Arabic unicode codeblock' do
9
+ # Arabic: U+0060 - U+06FF
10
+ good_ar = (0..255).map{|i| eval '"\u06' + ("%02x" % i) + '"'}.join ' '
11
+ broken_ar = load_misencoded_ar
12
+ encoder.repair(broken_ar).should == good_ar
13
+ end
14
+
15
+ it 'should not change ASCII chars' do
16
+ text = [*0..127].map(&:chr).join
17
+ encoder.repair(text).should == text
18
+ end
19
+
20
+ it 'should not change correctly encoded Arabic chars' do
21
+ text = 'إختبار'
22
+ encoder.repair(text).should == text
23
+ end
24
+ end
25
+ end
@@ -0,0 +1 @@
1
+ ؀ ؁ ؂ ؃ ؄ ؅ ؆ ؇ ؈ ؉ ؊ ؋ ، ؍ ؎ ؏ ؐ ؑ ؒ ؓ ؔ ؕ ؖ ؗ ؘ ؙ ؚ ؛ ؜ ؝ ؞ ؟ ؠ ء آ أ ؤ إ ئ ا ب ة ت ث ج ح خ د ذ ر ز س ش ص ض ط ظ ع غ ػ ؼ ؽ ؾ ؿ ـ ف ق ك ل م ن ه و ى ي ً ٌ ٍ َ ُ ِ ّ ْ ٓ ٔ ٕ ٖ ٗ ٘ ٙ ٚ ٛ ٜ ٝ ٞ ٟ ٠ ١ ٢ ٣ ٤ ٥ ٦ ٧ ٨ ٩ ٪ ٫ ٬ ٭ ٮ ٯ ٰ ٱ ٲ ٳ ٴ ٵ ٶ ٷ ٸ ٹ ٺ ٻ ټ ٽ پ ٿ ڀ ځ ڂ ڃ ڄ څ چ ڇ ڈ ډ ڊ ڋ ڌ ڍ ڎ ڏ ڐ ڑ ڒ ړ ڔ ڕ ږ ڗ ژ ڙ ښ ڛ ڜ ڝ ڞ ڟ ڠ ڡ ڢ ڣ ڤ ڥ ڦ ڧ ڨ ک ڪ ګ ڬ ڭ ڮ گ ڰ ڱ ڲ ڳ ڴ ڵ ڶ ڷ ڸ ڹ ں ڻ ڼ ڽ ھ ڿ ۀ ہ ۂ ۃ ۄ ۅ ۆ ۇ ۈ ۉ ۊ ۋ ی ۍ ێ ۏ ې ۑ ے ۓ ۔ ە ۖ ۗ ۘ ۙ ۚ ۛ ۜ ۝ ۞ ۟ ۠ ۡ ۢ ۣ ۤ ۥ ۦ ۧ ۨ ۩ ۪ ۫ ۬ ۭ ۮ ۯ ۰ ۱ ۲ ۳ ۴ ۵ ۶ ۷ ۸ ۹ ۺ ۻ ۼ ۽ ۾ ۿ
@@ -7,3 +7,11 @@ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
7
7
  $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
8
8
 
9
9
  require 'mosaheh'
10
+
11
+ def load_misencoded_sample(filename)
12
+ File.read("#{File.dirname(__FILE__)}/misencoded_samples/#{filename}").chomp
13
+ end
14
+
15
+ def load_misencoded_ar
16
+ load_misencoded_sample('ar.txt')
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mosaheh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-31 00:00:00.000000000Z
12
+ date: 2011-11-05 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &21735460 !ruby/object:Gem::Requirement
16
+ requirement: &18315980 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,51 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *21735460
24
+ version_requirements: *18315980
25
+ - !ruby/object:Gem::Dependency
26
+ name: guard
27
+ requirement: &18315480 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *18315480
36
+ - !ruby/object:Gem::Dependency
37
+ name: guard-rspec
38
+ requirement: &18314980 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *18314980
47
+ - !ruby/object:Gem::Dependency
48
+ name: rb-inotify
49
+ requirement: &18314480 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *18314480
58
+ - !ruby/object:Gem::Dependency
59
+ name: libnotify
60
+ requirement: &18313920 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *18313920
25
69
  description: Mosaheh repairs UTF-8 Arabic (U+0600 - U+06FF) text which has been mistakenly
26
70
  saved as single-byte latin1 encoding (cp1252). The biggest usecase for it is to
27
71
  repair Arabic data stored in MySql databases with the wrong encoding.
@@ -33,14 +77,19 @@ extra_rdoc_files: []
33
77
  files:
34
78
  - .gitignore
35
79
  - Gemfile
80
+ - Guardfile
36
81
  - README.md
37
82
  - Rakefile
38
83
  - lib/mosaheh.rb
84
+ - lib/mosaheh/core_ext.rb
85
+ - lib/mosaheh/core_ext/string.rb
39
86
  - lib/mosaheh/encoder.rb
40
87
  - lib/mosaheh/version.rb
41
88
  - mosaheh.gemspec
42
89
  - spec/.rspec
43
- - spec/mosaheh/encoder_spec.rb
90
+ - spec/lib/mosaheh/core_ext/string_spec.rb
91
+ - spec/lib/mosaheh/encoder_spec.rb
92
+ - spec/misencoded_samples/ar.txt
44
93
  - spec/spec_helper.rb
45
94
  homepage: ''
46
95
  licenses: []
@@ -1,17 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path('../../spec_helper', __FILE__)
3
-
4
- describe Mosaheh::Encoder do
5
- let(:encoder) { Mosaheh::Encoder.new }
6
-
7
- describe '#repair' do
8
- it 'should repair the whole Arabic unicode codeblock' do
9
- encoder.repair(Mosaheh::Encoder::BROKEN_AR).unpack('C*').should eq Mosaheh::Encoder::AR.unpack('C*')
10
- end
11
-
12
- it 'should not change ASCII chars' do
13
- text = [*0..127].map(&:chr).join
14
- encoder.repair(text).should == text
15
- end
16
- end
17
- end