mosaheh 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ guard 'rspec', :cli => '--color --format nested' do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ end
@@ -1,6 +1,8 @@
1
1
  # encoding: UTF-8
2
2
  require "mosaheh/version"
3
3
 
4
+ require 'mosaheh/core_ext'
5
+
4
6
  module Mosaheh
5
7
  autoload :Encoder, 'mosaheh/encoder'
6
8
  end
@@ -0,0 +1,3 @@
1
+ Dir["#{File.dirname(__FILE__)}/core_ext/*.rb"].each do |path|
2
+ require "mosaheh/core_ext/#{File.basename(path, '.rb')}"
3
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ class String
3
+
4
+ # Removes the first charecter from the String
5
+ # and returns it back
6
+ #
7
+ # @example
8
+ # str = 'abc'
9
+ # str.shift! # => 'a'
10
+ # p str # => 'bc'
11
+ # @return [String] The removed first charecter
12
+ def shift!
13
+ char = self[0]
14
+ self[0] = ''
15
+ char
16
+ end
17
+ end
@@ -1,118 +1,83 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  class Mosaheh::Encoder
4
-
5
- # Arabic UTF-8 Block, from U+0600 to U+06FF seperated using '_*_'
6
- AR = '؀_*_؁_*_؂_*_؃_*_؄_*_؅_*_؆_*_؇_*_؈_*_؉_*_؊_*_؋_*_،_*_؍_*_؎_*_؏_*_ؐ_*_ؑ_*_ؒ_*_ؓ_*_ؔ_*_ؕ_*_ؖ_*_ؗ_*_ؘ_*_ؙ_*_ؚ_*_؛_*_؜_*_؝_*_؞_*_؟_*_ؠ_*_ء_*_آ_*_أ_*_ؤ_*_إ_*_ئ_*_ا_*_ب_*_ة_*_ت_*_ث_*_ج_*_ح_*_خ_*_د_*_ذ_*_ر_*_ز_*_س_*_ش_*_ص_*_ض_*_ط_*_ظ_*_ع_*_غ_*_ػ_*_ؼ_*_ؽ_*_ؾ_*_ؿ_*_ـ_*_ف_*_ق_*_ك_*_ل_*_م_*_ن_*_ه_*_و_*_ى_*_ي_*_ً_*_ٌ_*_ٍ_*_َ_*_ُ_*_ِ_*_ّ_*_ْ_*_ٓ_*_ٔ_*_ٕ_*_ٖ_*_ٗ_*_٘_*_ٙ_*_ٚ_*_ٛ_*_ٜ_*_ٝ_*_ٞ_*_ٟ_*_٠_*_١_*_٢_*_٣_*_٤_*_٥_*_٦_*_٧_*_٨_*_٩_*_٪_*_٫_*_٬_*_٭_*_ٮ_*_ٯ_*_ٰ_*_ٱ_*_ٲ_*_ٳ_*_ٴ_*_ٵ_*_ٶ_*_ٷ_*_ٸ_*_ٹ_*_ٺ_*_ٻ_*_ټ_*_ٽ_*_پ_*_ٿ_*_ڀ_*_ځ_*_ڂ_*_ڃ_*_ڄ_*_څ_*_چ_*_ڇ_*_ڈ_*_ډ_*_ڊ_*_ڋ_*_ڌ_*_ڍ_*_ڎ_*_ڏ_*_ڐ_*_ڑ_*_ڒ_*_ړ_*_ڔ_*_ڕ_*_ږ_*_ڗ_*_ژ_*_ڙ_*_ښ_*_ڛ_*_ڜ_*_ڝ_*_ڞ_*_ڟ_*_ڠ_*_ڡ_*_ڢ_*_ڣ_*_ڤ_*_ڥ_*_ڦ_*_ڧ_*_ڨ_*_ک_*_ڪ_*_ګ_*_ڬ_*_ڭ_*_ڮ_*_گ_*_ڰ_*_ڱ_*_ڲ_*_ڳ_*_ڴ_*_ڵ_*_ڶ_*_ڷ_*_ڸ_*_ڹ_*_ں_*_ڻ_*_ڼ_*_ڽ_*_ھ_*_ڿ_*_ۀ_*_ہ_*_ۂ_*_ۃ_*_ۄ_*_ۅ_*_ۆ_*_ۇ_*_ۈ_*_ۉ_*_ۊ_*_ۋ_*_ی_*_ۍ_*_ێ_*_ۏ_*_ې_*_ۑ_*_ے_*_ۓ_*_۔_*_ە_*_ۖ_*_ۗ_*_ۘ_*_ۙ_*_ۚ_*_ۛ_*_ۜ_*_۝_*_۞_*_۟_*_۠_*_ۡ_*_ۢ_*_ۣ_*_ۤ_*_ۥ_*_ۦ_*_ۧ_*_ۨ_*_۩_*_۪_*_۫_*_۬_*_ۭ_*_ۮ_*_ۯ_*_۰_*_۱_*_۲_*_۳_*_۴_*_۵_*_۶_*_۷_*_۸_*_۹_*_ۺ_*_ۻ_*_ۼ_*_۽_*_۾_*_ۿ'
7
4
 
8
- # U+0600 to U+06FF encoded using cp1252
9
- BROKEN_AR = 'Ø€_*_؁_*_Ø‚_*_؃_*_Ø„_*_Ø…_*_؆_*_؇_*_؈_*_؉_*_ØŠ_*_Ø‹_*_ØŒ_*_؍_*_ØŽ_*_؏_*_ؐ_*_Ø‘_*_Ø’_*_Ø“_*_Ø”_*_Ø•_*_Ø–_*_Ø—_*_ؘ_*_Ø™_*_Øš_*_Ø›_*_Øœ_*_؝_*_Øž_*_ØŸ_*_Ø _*_Ø¡_*_Ø¢_*_Ø£_*_ؤ_*_Ø¥_*_ئ_*_ا_*_ب_*_Ø©_*_ت_*_Ø«_*_ج_*_Ø­_*_Ø®_*_د_*_Ø°_*_ر_*_ز_*_س_*_Ø´_*_ص_*_ض_*_Ø·_*_ظ_*_ع_*_غ_*_Ø»_*_ؼ_*_ؽ_*_ؾ_*_Ø¿_*_Ù€_*_ف_*_Ù‚_*_Ùƒ_*_Ù„_*_Ù…_*_Ù†_*_Ù‡_*_Ùˆ_*_Ù‰_*_ÙŠ_*_Ù‹_*_ÙŒ_*_ٍ_*_ÙŽ_*_ُ_*_ِ_*_Ù‘_*_Ù’_*_Ù“_*_Ù”_*_Ù•_*_Ù–_*_Ù—_*_Ù˜_*_Ù™_*_Ùš_*_Ù›_*_Ùœ_*_ٝ_*_Ùž_*_ÙŸ_*_Ù _*_Ù¡_*_Ù¢_*_Ù£_*_Ù¤_*_Ù¥_*_Ù¦_*_Ù§_*_Ù¨_*_Ù©_*_Ùª_*_Ù«_*_Ù¬_*_Ù­_*_Ù®_*_Ù¯_*_Ù°_*_Ù±_*_Ù²_*_Ù³_*_Ù´_*_Ùµ_*_Ù¶_*_Ù·_*_Ù¸_*_Ù¹_*_Ùº_*_Ù»_*_Ù¼_*_Ù½_*_Ù¾_*_Ù¿_*_Ú€_*_ځ_*_Ú‚_*_Úƒ_*_Ú„_*_Ú…_*_Ú†_*_Ú‡_*_Úˆ_*_Ú‰_*_ÚŠ_*_Ú‹_*_ÚŒ_*_ڍ_*_ÚŽ_*_ڏ_*_ڐ_*_Ú‘_*_Ú’_*_Ú“_*_Ú”_*_Ú•_*_Ú–_*_Ú—_*_Ú˜_*_Ú™_*_Úš_*_Ú›_*_Úœ_*_ڝ_*_Úž_*_ÚŸ_*_Ú _*_Ú¡_*_Ú¢_*_Ú£_*_Ú¤_*_Ú¥_*_Ú¦_*_Ú§_*_Ú¨_*_Ú©_*_Úª_*_Ú«_*_Ú¬_*_Ú­_*_Ú®_*_Ú¯_*_Ú°_*_Ú±_*_Ú²_*_Ú³_*_Ú´_*_Úµ_*_Ú¶_*_Ú·_*_Ú¸_*_Ú¹_*_Úº_*_Ú»_*_Ú¼_*_Ú½_*_Ú¾_*_Ú¿_*_Û€_*_ہ_*_Û‚_*_Ûƒ_*_Û„_*_Û…_*_Û†_*_Û‡_*_Ûˆ_*_Û‰_*_ÛŠ_*_Û‹_*_ÛŒ_*_ۍ_*_ÛŽ_*_ۏ_*_ې_*_Û‘_*_Û’_*_Û“_*_Û”_*_Û•_*_Û–_*_Û—_*_Û˜_*_Û™_*_Ûš_*_Û›_*_Ûœ_*_۝_*_Ûž_*_ÛŸ_*_Û _*_Û¡_*_Û¢_*_Û£_*_Û¤_*_Û¥_*_Û¦_*_Û§_*_Û¨_*_Û©_*_Ûª_*_Û«_*_Û¬_*_Û­_*_Û®_*_Û¯_*_Û°_*_Û±_*_Û²_*_Û³_*_Û´_*_Ûµ_*_Û¶_*_Û·_*_Û¸_*_Û¹_*_Ûº_*_Û»_*_Û¼_*_Û½_*_Û¾_*_Û¿'
5
+ # Initialize the encoder
6
+ def initialize
7
+ # UTF-8 bytes-sequences always begin with one of (0xD8 - 0xDB) for Arabic
8
+ @utf_8_beginning_chars = [*216..219].map(&:chr).join.force_encoding('cp1252')
10
9
 
11
- def initialize options = {}
12
- @replace_char = options[:replace_char] || '?'
10
+ # Misencoded sequences can be correctly re-encoded to utf-8, EXCEPT for one
11
+ # charecter with gets replaced with a space (ASCII for it: 32)!
12
+ @problem_char = 32.chr.force_encoding('cp1252')
13
13
 
14
- generate_mappings_hash
14
+ # The correct replacement for the problem charecter
15
+ @correct_char = 160.chr.force_encoding('cp1252')
15
16
  end
16
-
17
- def show_mappings_hash
18
- i = 0
19
- justification = 0;
20
- mappings = ''
21
- ar = AR.split('_*_')
22
-
23
- @map.each do |broken, good|
24
-
25
- str = "[#{broken.join(', ')}]"
26
-
27
- justification = str.length if str.length > justification
28
- str = str.ljust justification
29
-
30
- str += ' => '
31
- str += "[#{good.join(', ')}]"
32
- str += " # #{ar[i]}\n"
33
-
34
- mappings += str
35
-
36
- i += 1
37
- end
38
-
39
- puts mappings
40
- end
41
-
17
+
18
+ # Repairs Arabic (U+0600 - U+06FF) data
19
+ # which has been misencoded from cp1252 to UTF-8
20
+ # although the original data was UTF-8 encoded
21
+ #
22
+ # @param [String] Misencoded string
23
+ # @return [String] Correctly encoded utf-8 string
42
24
  def repair(str)
43
- @broken = str.unpack('C*')
44
- @repaired = []
45
-
46
- while @broken.length > 0
25
+
26
+ # Data buffers
27
+ source = str.clone
28
+ fixed = ""
29
+
30
+ # Each string needs a new converter instance
31
+ ec = Encoding::Converter.new('utf-8', 'cp1252')
32
+
33
+ until source.empty?
47
34
 
48
- # Try to use the mappings hash first and jump to the
49
- # next sequence if we succeed
50
- next if sequence_found_in_map
35
+ # Don't process correctly UTF-8
36
+ # encoded Arabic data
37
+ if is_arabic?(source[0])
38
+ fixed += source.shift!.force_encoding('cp1252')
39
+ next
40
+ end
51
41
 
52
- # Try to handle ASCII chars if they are not a part
53
- # of a broken sequence
54
- next if byte_not_broken
42
+ state = ec.primitive_convert(source, fixed, nil, nil, Encoding::Converter::AFTER_OUTPUT)
43
+
44
+ # When an undefined sequence is found, we only move the
45
+ # 2nd byte to the fixed data, as it will be valid in UTF-8.
46
+ # For example: 129 is undefined in cp1252, but it is in UTF-8.
47
+ # If we get a sequence like:
48
+ # ec.last_error.error_char.unpack('C*') # => [194, 129]
49
+ # We just ignore the 194 and add the 129 to the fixed data
50
+ if state == :undefined_conversion
51
+ c = ec.last_error.error_char.unpack('C*')[1].chr
52
+ fixed += c.force_encoding('cp1252')
53
+ end
55
54
 
56
- handle_unknown_byte
55
+ # After each byte gets converted, check for the problem charecter
56
+ # and replace it if it's found
57
+ if state == :after_output && ends_with_problem?(fixed)
58
+ fixed.gsub!(/#{@problem_char}$/, @correct_char)
59
+ end
57
60
  end
58
61
 
59
- result = @repaired.pack('C*')
60
- @repaired = []
61
- result
62
+ fixed.force_encoding('utf-8')
62
63
  end
63
64
 
64
65
  private
65
66
 
66
- def generate_mappings_hash
67
- @map = {}
68
- ar = AR.split('_*_')
69
-
70
- BROKEN_AR.split('_*_').each_with_index do |c, i|
71
- @map[ c.unpack('C*') ] = ar[i].unpack('C*')
72
- end
73
- end
74
-
75
- def sequence_found_in_map
76
- (1..4).each do |i|
77
- broken_seq = @broken[0..i]
78
- if @map.has_key?(broken_seq)
79
- @repaired += @map[broken_seq]
80
- @broken.slice!(0, i + 1)
81
- return true
82
- end
83
- end
84
- false
67
+ # Used to test for correctly encoded UTF-8 Arabic
68
+ #
69
+ # @param [String] Data to test
70
+ # @return [Boolean]
71
+ def is_arabic?(str)
72
+ str =~ %r{([\u0600-\u06FF])+}u
85
73
  end
86
74
 
87
- def byte_not_broken
88
- if (
89
- @broken.first != 195 && # The byte is not the beginning of a broken sequence
90
- @broken.first < 256 # One byte char
91
- ) || (
92
- @broken.first == 195 && # The byte is the beginning of a sequence ...
93
- !(152..155).include?(@broken[1]) # ... but the next one is not, so it's not a sequence!
94
- )
95
- # Add the byte to the repaired sequence and remove it from the broken one
96
- @repaired << @broken.shift
97
- end
98
- end
99
-
100
- def handle_unknown_byte
101
- case
102
- # Handle the case when the last 2 bytes are the beginning of
103
- # a broken sequence but it's not found in the mappings hash.
104
- # The best guess is that they're 2 one-byte chars.
105
- #
106
- # Handles: [195, (152 | 153 | 154 | 155)]
107
- when @broken.length == 2
108
- @repaired << @broken.first << @broken[1]
109
- @broken.slice!(0, 2)
110
-
111
- # If we are here, then we have no idea what is this byte!
112
- # We will just use the replace_char to replace the unknown byte_not_broken
113
- # in the repaired sequence
114
- else
115
- @repaired += @replace_char.bytes.to_a
116
- end
75
+ # Used to check for the problem char in the end
76
+ # of a given String
77
+ #
78
+ # @param [String] Data to test
79
+ # @return [Boolean]
80
+ def ends_with_problem?(str)
81
+ str =~ %r{[#{@utf_8_beginning_chars}]#{@problem_char}$}
117
82
  end
118
83
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: UTF-8
2
2
  module Mosaheh
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
@@ -18,5 +18,10 @@ Gem::Specification.new do |s|
18
18
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
19
  s.require_paths = ["lib"]
20
20
 
21
- s.add_development_dependency "rspec"
21
+ s.add_development_dependency 'rspec'
22
+ s.add_development_dependency 'guard'
23
+ s.add_development_dependency 'guard-rspec'
24
+ # Linux notifications for guard
25
+ s.add_development_dependency 'rb-inotify'
26
+ s.add_development_dependency 'libnotify'
22
27
  end
@@ -0,0 +1,15 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path('../../../../spec_helper', __FILE__)
3
+
4
+ describe String, '#shift!' do
5
+ let(:string) { 'abc' }
6
+
7
+ it 'should remove the first charecter' do
8
+ string.shift!
9
+ string.should == 'bc'
10
+ end
11
+
12
+ it 'should return the removed charecter' do
13
+ string.shift!.should == 'a'
14
+ end
15
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path('../../../spec_helper', __FILE__)
3
+
4
+ describe Mosaheh::Encoder do
5
+ let(:encoder) { Mosaheh::Encoder.new }
6
+
7
+ describe '#repair' do
8
+ it 'should repair the whole Arabic unicode codeblock' do
9
+ # Arabic: U+0060 - U+06FF
10
+ good_ar = (0..255).map{|i| eval '"\u06' + ("%02x" % i) + '"'}.join ' '
11
+ broken_ar = load_misencoded_ar
12
+ encoder.repair(broken_ar).should == good_ar
13
+ end
14
+
15
+ it 'should not change ASCII chars' do
16
+ text = [*0..127].map(&:chr).join
17
+ encoder.repair(text).should == text
18
+ end
19
+
20
+ it 'should not change correctly encoded Arabic chars' do
21
+ text = 'إختبار'
22
+ encoder.repair(text).should == text
23
+ end
24
+ end
25
+ end
@@ -0,0 +1 @@
1
+ ؀ ؁ ؂ ؃ ؄ ؅ ؆ ؇ ؈ ؉ ؊ ؋ ، ؍ ؎ ؏ ؐ ؑ ؒ ؓ ؔ ؕ ؖ ؗ ؘ ؙ ؚ ؛ ؜ ؝ ؞ ؟ ؠ ء آ أ ؤ إ ئ ا ب ة ت ث ج ح خ د ذ ر ز س ش ص ض ط ظ ع غ ػ ؼ ؽ ؾ ؿ ـ ف ق ك ل م ن ه و ى ي ً ٌ ٍ َ ُ ِ ّ ْ ٓ ٔ ٕ ٖ ٗ ٘ ٙ ٚ ٛ ٜ ٝ ٞ ٟ ٠ ١ ٢ ٣ ٤ ٥ ٦ ٧ ٨ ٩ ٪ ٫ ٬ ٭ ٮ ٯ ٰ ٱ ٲ ٳ ٴ ٵ ٶ ٷ ٸ ٹ ٺ ٻ ټ ٽ پ ٿ ڀ ځ ڂ ڃ ڄ څ چ ڇ ڈ ډ ڊ ڋ ڌ ڍ ڎ ڏ ڐ ڑ ڒ ړ ڔ ڕ ږ ڗ ژ ڙ ښ ڛ ڜ ڝ ڞ ڟ ڠ ڡ ڢ ڣ ڤ ڥ ڦ ڧ ڨ ک ڪ ګ ڬ ڭ ڮ گ ڰ ڱ ڲ ڳ ڴ ڵ ڶ ڷ ڸ ڹ ں ڻ ڼ ڽ ھ ڿ ۀ ہ ۂ ۃ ۄ ۅ ۆ ۇ ۈ ۉ ۊ ۋ ی ۍ ێ ۏ ې ۑ ے ۓ ۔ ە ۖ ۗ ۘ ۙ ۚ ۛ ۜ ۝ ۞ ۟ ۠ ۡ ۢ ۣ ۤ ۥ ۦ ۧ ۨ ۩ ۪ ۫ ۬ ۭ ۮ ۯ ۰ ۱ ۲ ۳ ۴ ۵ ۶ ۷ ۸ ۹ ۺ ۻ ۼ ۽ ۾ ۿ
@@ -7,3 +7,11 @@ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
7
7
  $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
8
8
 
9
9
  require 'mosaheh'
10
+
11
+ def load_misencoded_sample(filename)
12
+ File.read("#{File.dirname(__FILE__)}/misencoded_samples/#{filename}").chomp
13
+ end
14
+
15
+ def load_misencoded_ar
16
+ load_misencoded_sample('ar.txt')
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mosaheh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-31 00:00:00.000000000Z
12
+ date: 2011-11-05 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &21735460 !ruby/object:Gem::Requirement
16
+ requirement: &18315980 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,51 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *21735460
24
+ version_requirements: *18315980
25
+ - !ruby/object:Gem::Dependency
26
+ name: guard
27
+ requirement: &18315480 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *18315480
36
+ - !ruby/object:Gem::Dependency
37
+ name: guard-rspec
38
+ requirement: &18314980 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *18314980
47
+ - !ruby/object:Gem::Dependency
48
+ name: rb-inotify
49
+ requirement: &18314480 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *18314480
58
+ - !ruby/object:Gem::Dependency
59
+ name: libnotify
60
+ requirement: &18313920 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *18313920
25
69
  description: Mosaheh repairs UTF-8 Arabic (U+0600 - U+06FF) text which has been mistakenly
26
70
  saved as single-byte latin1 encoding (cp1252). The biggest usecase for it is to
27
71
  repair Arabic data stored in MySql databases with the wrong encoding.
@@ -33,14 +77,19 @@ extra_rdoc_files: []
33
77
  files:
34
78
  - .gitignore
35
79
  - Gemfile
80
+ - Guardfile
36
81
  - README.md
37
82
  - Rakefile
38
83
  - lib/mosaheh.rb
84
+ - lib/mosaheh/core_ext.rb
85
+ - lib/mosaheh/core_ext/string.rb
39
86
  - lib/mosaheh/encoder.rb
40
87
  - lib/mosaheh/version.rb
41
88
  - mosaheh.gemspec
42
89
  - spec/.rspec
43
- - spec/mosaheh/encoder_spec.rb
90
+ - spec/lib/mosaheh/core_ext/string_spec.rb
91
+ - spec/lib/mosaheh/encoder_spec.rb
92
+ - spec/misencoded_samples/ar.txt
44
93
  - spec/spec_helper.rb
45
94
  homepage: ''
46
95
  licenses: []
@@ -1,17 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path('../../spec_helper', __FILE__)
3
-
4
- describe Mosaheh::Encoder do
5
- let(:encoder) { Mosaheh::Encoder.new }
6
-
7
- describe '#repair' do
8
- it 'should repair the whole Arabic unicode codeblock' do
9
- encoder.repair(Mosaheh::Encoder::BROKEN_AR).unpack('C*').should eq Mosaheh::Encoder::AR.unpack('C*')
10
- end
11
-
12
- it 'should not change ASCII chars' do
13
- text = [*0..127].map(&:chr).join
14
- encoder.repair(text).should == text
15
- end
16
- end
17
- end