arabic-letter-connector 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1 @@
1
+ *.gem
@@ -0,0 +1,19 @@
1
+ $:.unshift(File.join(File.dirname(__FILE__), 'lib'))
2
+
3
+ require 'arabic-letter-connector/version'
4
+
5
+ Gem::Specification.new do |s|
6
+
7
+ s.name = 'arabic-letter-connector'
8
+ s.version = ArabicLetterConnector::VERSION
9
+ s.date = '2013-05-29'
10
+ s.summary = 'Arabic Letter Connector'
11
+ s.description = 'A tool to replace generic disconnected Arabic letters with their connected counterparts.'
12
+ s.authors = ["Sinan Taifour", "Ahmed Nasser"]
13
+ s.email = 'sinan@taifour.com'
14
+ s.homepage = 'http://github.com/staii/arabic-letter-connector'
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.require_paths = ['lib']
18
+
19
+ end
@@ -0,0 +1,3 @@
1
+ require 'arabic-letter-connector/version'
2
+ require 'arabic-letter-connector/logic'
3
+ require 'arabic-letter-connector/string'
@@ -0,0 +1,126 @@
1
+ module ArabicLetterConnector
2
+
3
+ @@charinfos = nil
4
+
5
+ class CharacterInfo
6
+
7
+ attr_accessor :common , :formatted
8
+
9
+ def initialize(common, isolated, final, initial, medial, connects)
10
+ @common = common
11
+ @formatted = {
12
+ :isolated => isolated,
13
+ :final => final,
14
+ :initial => initial,
15
+ :medial => medial,
16
+ }
17
+ @connects = connects
18
+ end
19
+
20
+ def connects?
21
+ @connects
22
+ end
23
+
24
+ end
25
+
26
+ # Determine the form of the current character (:isolated, :initial, :medial,
27
+ # or :final), given the previous character and the next one. In Arabic, all
28
+ # characters can connect with a previous character, but not all letters can
29
+ # connect with the next character (this is determined by
30
+ # CharacterInfo#connects?).
31
+ def self.determine_form(previous_char, next_char)
32
+ charinfos = self.charinfos
33
+ if charinfos[previous_char] && charinfos[next_char]
34
+ charinfos[previous_char].connects? ? :medial : :initial # If the current character does not connect,
35
+ # its medial form will map to its final form,
36
+ # and its initial form will map to its isolated form.
37
+ elsif charinfos[previous_char] # The next character is not an arabic character.
38
+ charinfos[previous_char].connects? ? :final : :isolated
39
+ elsif charinfos[next_char] # The previous character is not an arabic character.
40
+ :initial # If the current character does not connect, its initial form will map to its isolated form.
41
+ else # Neither of the surrounding characters are arabic characters.
42
+ :isolated
43
+ end
44
+ end
45
+
46
+ def self.transform(str)
47
+ res = ""
48
+ charinfos = self.charinfos
49
+ previous_char = nil
50
+ current_char = nil
51
+ next_char = nil
52
+ consume_character = lambda do |char|
53
+ previous_char = current_char
54
+ current_char = next_char
55
+ next_char = char
56
+ return unless current_char
57
+ if charinfos.keys.include?(current_char)
58
+ form = determine_form(previous_char, next_char)
59
+ res += charinfos[current_char].formatted[form]
60
+ else
61
+ res += current_char
62
+ end
63
+ end
64
+ str.each_char { |char| consume_character.call(char) }
65
+ consume_character.call(nil)
66
+ return res
67
+ end
68
+
69
+ private
70
+
71
+ def self.charinfos
72
+ return @@charinfos unless @@charinfos.nil?
73
+ @@charinfos = {}
74
+ add("0627", "fe8d", "fe8e", "fe8d", "fe8e", false) # Alef
75
+ add("0628", "fe8f", "fe90", "fe91", "fe92", true) # Ba2
76
+ add("062a", "fe95", "fe96", "fe97", "fe98", true) # Ta2
77
+ add("062b", "fe99", "fe9a", "fe9b", "fe9c", true) # Tha2
78
+ add("062c", "fe9d", "fe9e", "fe9f", "fea0", true) # Jeem
79
+ add("062d", "fea1", "fea2", "fea3", "fea4", true) # 7a2
80
+ add("062e", "fea5", "fea6", "fea7", "fea8", true) # 7'a2
81
+ add("062f", "fea9", "feaa", "fea9", "feaa", false) # Dal
82
+ add("0630", "feab", "feac", "feab", "feac", false) # Thal
83
+ add("0631", "fead", "feae", "fead", "feae", false) # Ra2
84
+ add("0632", "feaf", "feb0", "feaf", "feb0", false) # Zain
85
+ add("0633", "feb1", "feb2", "feb3", "feb4", true) # Seen
86
+ add("0634", "feb5", "feb6", "feb7", "feb8", true) # Sheen
87
+ add("0635", "feb9", "feba", "febb", "febc", true) # 9ad
88
+ add("0636", "febd", "febe", "febf", "fec0", true) # 9'ad
89
+ add("0637", "fec1", "fec2", "fec3", "fec4", true) # 6a2
90
+ add("0638", "fec5", "fec6", "fec7", "fec8", true) # 6'a2
91
+ add("0639", "fec9", "feca", "fecb", "fecc", true) # 3ain
92
+ add("063a", "fecd", "fece", "fecf", "fed0", true) # 3'ain
93
+ add("0641", "fed1", "fed2", "fed3", "fed4", true) # Fa2
94
+ add("0642", "fed5", "fed6", "fed7", "fed8", true) # Qaf
95
+ add("0643", "fed9", "feda", "fedb", "fedc", true) # Kaf
96
+ add("0644", "fedd", "fede", "fedf", "fee0", true) # Lam
97
+ add("0645", "fee1", "fee2", "fee3", "fee4", true) # Meem
98
+ add("0646", "fee5", "fee6", "fee7", "fee8", true) # Noon
99
+ add("0647", "fee9", "feea", "feeb", "feec", true) # Ha2
100
+ add("0648", "feed", "feee", "feed", "feee", false) # Waw
101
+ add("064a", "fef1", "fef2", "fef3", "fef4", true) # Ya2
102
+ add("0621", "fe80", "fe80", "fe80", "fe80", false) # Hamza
103
+ add("0622", "fe81", "fe82", "fe81", "fe82", false) # Alef Madda
104
+ add("0623", "fe83", "fe84", "fe83", "fe84", false) # Alef Hamza Above
105
+ add("0624", "fe85", "fe86", "fe85", "fe86", false) # Waw Hamza
106
+ add("0625", "fe87", "fe88", "fe87", "fe88", false) # Alef Hamza Below
107
+ add("0626", "fe89", "fe8a", "fe8b", "fe8c", true) # Ya2 Hamza
108
+ add("0629", "fe93", "fe94", "fe93", "fe94", false) # Ta2 Marbu6a
109
+ add("0640", "0640", "0640", "0640", "0640", true) # Tatweel
110
+ add("0649", "feef", "fef0", "feef", "fef0", false) # Alef Layyina
111
+ @@charinfos
112
+ end
113
+
114
+ def self.add(common, isolated, final, initial, medial, connects)
115
+ charinfo = CharacterInfo.new(
116
+ [common.hex].pack("U"),
117
+ [isolated.hex].pack("U"),
118
+ [final.hex].pack("U"),
119
+ [initial.hex].pack("U"),
120
+ [medial.hex].pack("U"),
121
+ connects
122
+ )
123
+ @@charinfos[charinfo.common] = charinfo
124
+ end
125
+
126
+ end
@@ -0,0 +1,5 @@
1
+ class String
2
+ def connect_arabic_letters
3
+ ArabicLetterConnector.transform(self)
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ module ArabicLetterConnector
2
+ VERSION = "0.1.1"
3
+ end
metadata ADDED
@@ -0,0 +1,52 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: arabic-letter-connector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sinan Taifour
9
+ - Ahmed Nasser
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2013-05-29 00:00:00.000000000 Z
14
+ dependencies: []
15
+ description: A tool to replace generic disconnected Arabic letters with their connected
16
+ counterparts.
17
+ email: sinan@taifour.com
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - arabic-letter-connector.gemspec
24
+ - lib/arabic-letter-connector.rb
25
+ - lib/arabic-letter-connector/logic.rb
26
+ - lib/arabic-letter-connector/string.rb
27
+ - lib/arabic-letter-connector/version.rb
28
+ homepage: http://github.com/staii/arabic-letter-connector
29
+ licenses: []
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ requirements: []
47
+ rubyforge_project:
48
+ rubygems_version: 1.8.24
49
+ signing_key:
50
+ specification_version: 3
51
+ summary: Arabic Letter Connector
52
+ test_files: []