arabic-letter-connector 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.gem
|
@@ -0,0 +1,19 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), 'lib'))
|
2
|
+
|
3
|
+
require 'arabic-letter-connector/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
|
7
|
+
s.name = 'arabic-letter-connector'
|
8
|
+
s.version = ArabicLetterConnector::VERSION
|
9
|
+
s.date = '2013-05-29'
|
10
|
+
s.summary = 'Arabic Letter Connector'
|
11
|
+
s.description = 'A tool to replace generic disconnected Arabic letters with their connected counterparts.'
|
12
|
+
s.authors = ["Sinan Taifour", "Ahmed Nasser"]
|
13
|
+
s.email = 'sinan@taifour.com'
|
14
|
+
s.homepage = 'http://github.com/staii/arabic-letter-connector'
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.require_paths = ['lib']
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
module ArabicLetterConnector
|
2
|
+
|
3
|
+
@@charinfos = nil
|
4
|
+
|
5
|
+
class CharacterInfo
|
6
|
+
|
7
|
+
attr_accessor :common , :formatted
|
8
|
+
|
9
|
+
def initialize(common, isolated, final, initial, medial, connects)
|
10
|
+
@common = common
|
11
|
+
@formatted = {
|
12
|
+
:isolated => isolated,
|
13
|
+
:final => final,
|
14
|
+
:initial => initial,
|
15
|
+
:medial => medial,
|
16
|
+
}
|
17
|
+
@connects = connects
|
18
|
+
end
|
19
|
+
|
20
|
+
def connects?
|
21
|
+
@connects
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
# Determine the form of the current character (:isolated, :initial, :medial,
|
27
|
+
# or :final), given the previous character and the next one. In Arabic, all
|
28
|
+
# characters can connect with a previous character, but not all letters can
|
29
|
+
# connect with the next character (this is determined by
|
30
|
+
# CharacterInfo#connects?).
|
31
|
+
def self.determine_form(previous_char, next_char)
|
32
|
+
charinfos = self.charinfos
|
33
|
+
if charinfos[previous_char] && charinfos[next_char]
|
34
|
+
charinfos[previous_char].connects? ? :medial : :initial # If the current character does not connect,
|
35
|
+
# its medial form will map to its final form,
|
36
|
+
# and its initial form will map to its isolated form.
|
37
|
+
elsif charinfos[previous_char] # The next character is not an arabic character.
|
38
|
+
charinfos[previous_char].connects? ? :final : :isolated
|
39
|
+
elsif charinfos[next_char] # The previous character is not an arabic character.
|
40
|
+
:initial # If the current character does not connect, its initial form will map to its isolated form.
|
41
|
+
else # Neither of the surrounding characters are arabic characters.
|
42
|
+
:isolated
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.transform(str)
|
47
|
+
res = ""
|
48
|
+
charinfos = self.charinfos
|
49
|
+
previous_char = nil
|
50
|
+
current_char = nil
|
51
|
+
next_char = nil
|
52
|
+
consume_character = lambda do |char|
|
53
|
+
previous_char = current_char
|
54
|
+
current_char = next_char
|
55
|
+
next_char = char
|
56
|
+
return unless current_char
|
57
|
+
if charinfos.keys.include?(current_char)
|
58
|
+
form = determine_form(previous_char, next_char)
|
59
|
+
res += charinfos[current_char].formatted[form]
|
60
|
+
else
|
61
|
+
res += current_char
|
62
|
+
end
|
63
|
+
end
|
64
|
+
str.each_char { |char| consume_character.call(char) }
|
65
|
+
consume_character.call(nil)
|
66
|
+
return res
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def self.charinfos
|
72
|
+
return @@charinfos unless @@charinfos.nil?
|
73
|
+
@@charinfos = {}
|
74
|
+
add("0627", "fe8d", "fe8e", "fe8d", "fe8e", false) # Alef
|
75
|
+
add("0628", "fe8f", "fe90", "fe91", "fe92", true) # Ba2
|
76
|
+
add("062a", "fe95", "fe96", "fe97", "fe98", true) # Ta2
|
77
|
+
add("062b", "fe99", "fe9a", "fe9b", "fe9c", true) # Tha2
|
78
|
+
add("062c", "fe9d", "fe9e", "fe9f", "fea0", true) # Jeem
|
79
|
+
add("062d", "fea1", "fea2", "fea3", "fea4", true) # 7a2
|
80
|
+
add("062e", "fea5", "fea6", "fea7", "fea8", true) # 7'a2
|
81
|
+
add("062f", "fea9", "feaa", "fea9", "feaa", false) # Dal
|
82
|
+
add("0630", "feab", "feac", "feab", "feac", false) # Thal
|
83
|
+
add("0631", "fead", "feae", "fead", "feae", false) # Ra2
|
84
|
+
add("0632", "feaf", "feb0", "feaf", "feb0", false) # Zain
|
85
|
+
add("0633", "feb1", "feb2", "feb3", "feb4", true) # Seen
|
86
|
+
add("0634", "feb5", "feb6", "feb7", "feb8", true) # Sheen
|
87
|
+
add("0635", "feb9", "feba", "febb", "febc", true) # 9ad
|
88
|
+
add("0636", "febd", "febe", "febf", "fec0", true) # 9'ad
|
89
|
+
add("0637", "fec1", "fec2", "fec3", "fec4", true) # 6a2
|
90
|
+
add("0638", "fec5", "fec6", "fec7", "fec8", true) # 6'a2
|
91
|
+
add("0639", "fec9", "feca", "fecb", "fecc", true) # 3ain
|
92
|
+
add("063a", "fecd", "fece", "fecf", "fed0", true) # 3'ain
|
93
|
+
add("0641", "fed1", "fed2", "fed3", "fed4", true) # Fa2
|
94
|
+
add("0642", "fed5", "fed6", "fed7", "fed8", true) # Qaf
|
95
|
+
add("0643", "fed9", "feda", "fedb", "fedc", true) # Kaf
|
96
|
+
add("0644", "fedd", "fede", "fedf", "fee0", true) # Lam
|
97
|
+
add("0645", "fee1", "fee2", "fee3", "fee4", true) # Meem
|
98
|
+
add("0646", "fee5", "fee6", "fee7", "fee8", true) # Noon
|
99
|
+
add("0647", "fee9", "feea", "feeb", "feec", true) # Ha2
|
100
|
+
add("0648", "feed", "feee", "feed", "feee", false) # Waw
|
101
|
+
add("064a", "fef1", "fef2", "fef3", "fef4", true) # Ya2
|
102
|
+
add("0621", "fe80", "fe80", "fe80", "fe80", false) # Hamza
|
103
|
+
add("0622", "fe81", "fe82", "fe81", "fe82", false) # Alef Madda
|
104
|
+
add("0623", "fe83", "fe84", "fe83", "fe84", false) # Alef Hamza Above
|
105
|
+
add("0624", "fe85", "fe86", "fe85", "fe86", false) # Waw Hamza
|
106
|
+
add("0625", "fe87", "fe88", "fe87", "fe88", false) # Alef Hamza Below
|
107
|
+
add("0626", "fe89", "fe8a", "fe8b", "fe8c", true) # Ya2 Hamza
|
108
|
+
add("0629", "fe93", "fe94", "fe93", "fe94", false) # Ta2 Marbu6a
|
109
|
+
add("0640", "0640", "0640", "0640", "0640", true) # Tatweel
|
110
|
+
add("0649", "feef", "fef0", "feef", "fef0", false) # Alef Layyina
|
111
|
+
@@charinfos
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.add(common, isolated, final, initial, medial, connects)
|
115
|
+
charinfo = CharacterInfo.new(
|
116
|
+
[common.hex].pack("U"),
|
117
|
+
[isolated.hex].pack("U"),
|
118
|
+
[final.hex].pack("U"),
|
119
|
+
[initial.hex].pack("U"),
|
120
|
+
[medial.hex].pack("U"),
|
121
|
+
connects
|
122
|
+
)
|
123
|
+
@@charinfos[charinfo.common] = charinfo
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: arabic-letter-connector
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Sinan Taifour
|
9
|
+
- Ahmed Nasser
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2013-05-29 00:00:00.000000000 Z
|
14
|
+
dependencies: []
|
15
|
+
description: A tool to replace generic disconnected Arabic letters with their connected
|
16
|
+
counterparts.
|
17
|
+
email: sinan@taifour.com
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- .gitignore
|
23
|
+
- arabic-letter-connector.gemspec
|
24
|
+
- lib/arabic-letter-connector.rb
|
25
|
+
- lib/arabic-letter-connector/logic.rb
|
26
|
+
- lib/arabic-letter-connector/string.rb
|
27
|
+
- lib/arabic-letter-connector/version.rb
|
28
|
+
homepage: http://github.com/staii/arabic-letter-connector
|
29
|
+
licenses: []
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
requirements: []
|
47
|
+
rubyforge_project:
|
48
|
+
rubygems_version: 1.8.24
|
49
|
+
signing_key:
|
50
|
+
specification_version: 3
|
51
|
+
summary: Arabic Letter Connector
|
52
|
+
test_files: []
|