RubyGems - mosta-raramorph - Versions diffs - 0.1.0 - Mend

mosta-raramorph 0.1.0

Files changed (23) hide show

data/README +56 -0
data/bin/raramorph +6 -0
data/lib/dictionaries/dictPrefixes +421 -0
data/lib/dictionaries/dictStems +135989 -0
data/lib/dictionaries/dictSuffixes +1170 -0
data/lib/dictionaries/marshal_stems +0 -0
data/lib/dictionaries/tableAB +2276 -0
data/lib/dictionaries/tableAC +743 -0
data/lib/dictionaries/tableBC +1584 -0
data/lib/raramorph/arabic_latin_translator.rb +38 -0
data/lib/raramorph/dictionary_entry.rb +40 -0
data/lib/raramorph/in_memory_dictionary_handler.rb +325 -0
data/lib/raramorph/in_memory_solutions_handler.rb +78 -0
data/lib/raramorph/latin_arabic_translator.rb +35 -0
data/lib/raramorph/logger.rb +20 -0
data/lib/raramorph/raramorph.rb +417 -0
data/lib/raramorph/solution.rb +592 -0
data/lib/raramorph/translator.rb +40 -0
data/lib/raramorph.rb +16 -0
data/lib/raramorph_main.rb +34 -0
data/lib/test_input/UTF-8.txt +32 -0
data/raramorph.gemspec +42 -0
metadata +75 -0

data/lib/raramorph/solution.rb ADDED Viewed

@@ -0,0 +1,592 @@
+# A class to find the solution of the word
+# Author:: eSpace technologies  www.eSpace.com.eg
+# Copyright:: 2008
+class Solution
+  attr_reader :prefix, :stem, :suffix, :cnt
+  @@ends_with_set_for_pos_one = Set.new(["CONJ","EMPHATIC_PARTICLE","FUNC_WORD",
+                "FUT_PART","INTERJ","INTERROG_PART","IV1S","IV2MS",
+                "IV2FS","IV3MS","IV3FS","IV2D","IV2FD","IV3MD","IV3FD",
+                "IV1P","IV2MP","IV2FP","IV3MP","IV3FP","NEG_PART",
+                "PREP","RESULT_CLAUSE_PARTICLE"])
+  @@ends_with_set_for_pos_two = Set.new(["CASE_INDEF_NOM","CASE_INDEF_ACC",
+                "CASE_INDEF_ACCGEN","CASE_INDEF_GEN" ,"CASE_DEF_NOM" ,
+                "CASE_DEF_ACC" ,"CASE_DEF_ACCGEN","CASE_DEF_GEN" ,
+                "NSUFF_MASC_SG_ACC_INDEF" ,"NSUFF_FEM_SG" ,"NSUFF_MASC_DU_NOM" ,
+                "NSUFF_MASC_DU_NOM_POSS" ,"NSUFF_MASC_DU_ACCGEN" ,
+                "NSUFF_MASC_DU_ACCGEN_POSS" ,"NSUFF_FEM_DU_NOM" ,
+                "NSUFF_FEM_DU_NOM_POSS" ,"NSUFF_FEM_DU_ACCGEN" ,
+                "NSUFF_FEM_DU_ACCGEN_POSS" ,"NSUFF_MASC_PL_NOM" ,
+                "NSUFF_MASC_PL_NOM_POSS"  ,"NSUFF_MASC_PL_ACCGEN" ,
+                "NSUFF_MASC_PL_ACCGEN_POSS" ,"NSUFF_FEM_PL" ,"POSS_PRON_1S",
+                "POSS_PRON_2MS" ,"POSS_PRON_2FS" ,"POSS_PRON_3MS"  ,
+                "POSS_PRON_3FS","POSS_PRON_2D" ,"POSS_PRON_3D" ,"POSS_PRON_1P",
+                "POSS_PRON_2MP" ,"POSS_PRON_2FP" ,"POSS_PRON_3MP" ,"POSS_PRON_3FP" ,
+                "IVSUFF_DO:1S" ,"IVSUFF_DO:2MS" ,"IVSUFF_DO:2FS" ,"IVSUFF_DO:3MS" ,
+                "IVSUFF_DO:3FS" ,"IVSUFF_DO:2D" ,"IVSUFF_DO:3D" ,"IVSUFF_DO:1P" ,
+                "IVSUFF_DO:2MP" ,"IVSUFF_DO:2FP" ,"IVSUFF_DO:3MP" ,"IVSUFF_DO:3FP" ,
+                "IVSUFF_MOOD:I" ,"IVSUFF_SUBJ:2FS_MOOD:I" ,"IVSUFF_SUBJ:D_MOOD:I" ,
+                "IVSUFF_SUBJ:3D_MOOD:I" ,"IVSUFF_SUBJ:MP_MOOD:I" ,"IVSUFF_MOOD:S",
+                "IVSUFF_SUBJ:2FS_MOOD:SJ" ,"IVSUFF_SUBJ:D_MOOD:SJ","IVSUFF_SUBJ:MP_MOOD:SJ" ,
+                "IVSUFF_SUBJ:3MP_MOOD:SJ" ,"IVSUFF_SUBJ:FP" ,"PVSUFF_DO:1S" ,"PVSUFF_DO:2MS" ,
+                "PVSUFF_DO:2FS" ,"PVSUFF_DO:3MS" ,"PVSUFF_DO:3FS" ,"PVSUFF_DO:2D" ,
+                "PVSUFF_DO:3D" ,"PVSUFF_DO:1P" ,"PVSUFF_DO:2MP" ,"PVSUFF_DO:2FP" ,
+                "PVSUFF_DO:3MP" ,"PVSUFF_DO:3FP" ,"PVSUFF_SUBJ:1S" ,"PVSUFF_SUBJ:2MS" ,
+                "PVSUFF_SUBJ:2FS" ,"PVSUFF_SUBJ:3MS" ,"PVSUFF_SUBJ:3FS" ,"PVSUFF_SUBJ:2MD" ,
+                "PVSUFF_SUBJ:2FD" ,"PVSUFF_SUBJ:3MD" ,"PVSUFF_SUBJ:3FD" ,"PVSUFF_SUBJ:1P" ,
+                "PVSUFF_SUBJ:2MP" ,"PVSUFF_SUBJ:2FP" ,"PVSUFF_SUBJ:3MP" ,"PVSUFF_SUBJ:3FP" ,
+                "CVSUFF_DO:1S" ,"CVSUFF_DO:3MS" ,"CVSUFF_DO:3FS" ,"CVSUFF_DO:3D" ,
+                "CVSUFF_DO:1P" ,"CVSUFF_DO:3MP" ,"CVSUFF_DO:3FP" ,"CVSUFF_SUBJ:2MS" ,
+                "CVSUFF_SUBJ:2FS" ,"CVSUFF_SUBJ:2MP"])
+  protected
+  # Constructs a solution for a word. Note that the prefix, stem and suffix combination is <b>recomputed</b>
+  #and may not necessarily match with the information provided by the dictionaries.
+  # * [debug] Whether or not the dictionnaries inconsistencies should be output
+  # * [cnt] Order in sequence ; not very useful actually
+  # * [prefix The prefix as provided by the prefixes dictionnary
+  # * [stem] The stem as provided by the stems dictionnary
+  # * [suffix] The suffix as provided by the suffixes dictionnary
+  #
+  def initialize(debug, cnt, prefix, stem, suffix)
+      # Whether or not the dictionnaries inconsistencies should be output
+      @debug = debug;
+      # The order in solutions' sequence.
+      @cnt = cnt;
+      # The dictionary entry of the prefix.
+      @prefix = prefix;
+      # The dictionary entry of the stem.
+      @stem = stem;
+      # The dictionary entry of the suffix.
+      @suffix = suffix;
+      # The prefixes POS.
+      @prefixesPOS = prefix.pos
+      #The stems POS.
+      @stemsPOS = stem.pos
+      #The suffixes POS.
+      @suffixesPOS = suffix.pos
+      #The prefixes glosses.
+      @prefixesGlosses = prefix.glosses
+      #The stems glosses
+      @stemsGlosses = stem.glosses
+      #The suffixes glosses.
+      @suffixesGlosses = suffix.glosses
+      puts "\"#{get_lemma()}\" : stem's sizes for POS (\"#{@stemsPOS.length.to_s}\") and GLOSS (\"#{@stemsGlosses.length.to_s}\") do not match" if (@stemsPOS.length != @stemsGlosses.length and @debug)
+      #Normalize stems since some of them can contain prefixes
+      while(@stemsPOS.length>0)
+              stemPOS = @stemsPOS.slice(0)
+              stemPOS.force_encoding "UTF-8" if(stemPOS)
+              if (@stemsGlosses.length>0)
+                stemGloss = @stemsGlosses.slice(0)
+              else
+                stemGloss = nil
+              end
+              stemGloss.force_encoding "UTF-8" if(stemGloss)
+                 if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_one) )
+                      @stemsPOS.slice!(0)
+                      @prefixesPOS.push(stemPOS)
+                      if (stemGloss)
+                        @stemsGlosses.slice!(0)
+                        @prefixesGlosses.push(stemGloss)
+                      end
+                 else
+                   break
+                 end
+      end
+      #Normalize stems since some of them can contain suffixes
+      while(@stemsPOS.length>0)
+              stemPOS = @stemsPOS.slice(@stemsPOS.length-1)
+              if(stemPOS)
+                stemPOS.force_encoding "UTF-8"
+              end
+              if (@stemsGlosses.length>0)
+                stemGloss = @stemsGlosses.slice(@stemsGlosses.length-1)
+              else
+                stemGloss = nil
+              end
+              if(stemGloss)
+                stemGloss.force_encoding "UTF-8"
+              end
+              if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_two))
+                      @stemsPOS.slice!(@stemsPOS.length-1)
+                      @suffixesPOS.insert(0,stemPOS)
+                      if (stemGloss)
+                        @stemsGlosses.slice!(@stemsGlosses.length-1)
+                        @suffixesGlosses.insert(0,stemGloss)
+                      end
+              else
+                      break
+              end
+      end
+      #Normalization of bayon, bayona, bayoni
+      if (@stemsPOS.length > 1)
+          pos0 = @stemsPOS[0]
+          pos1 = @stemsPOS[1]
+          if(pos1=="bayon" or pos1=="bayona" or pos1=="bayoni")
+                  if (@debug)
+                    puts "Merging \""+pos1+"\" into first part of stem \"" + pos0 + "\""
+                  end
+                  array = pos0.split("/");
+                  sb = array[0] + pos1+"/"
+                  i=1
+                  while( i < array.length)
+                          sb+=array[i]
+                  end
+                  @stemsPOS.slice!(0)
+                  @stemsPOS[0] = sb
+          end
+      end
+      # Sanity check
+      if (@stemsPOS.length > 1 and @debug)
+            puts"More than one stem for " + @stemsPOS.to_string()
+      end
+  end
+  # Returns the lemma id in the stems dictionary.
+  #  * @return The lemma ID
+  #
+  def get_lemma
+    x = Regexp.compile("(_|-).*$")
+    @stem.lemma_id.sub(x,"")
+  end
+  # Returns the vocalizations of the <b>recomputed</b> prefixes in the Buckwalter transliteration system
+  # or  <b>nil</b> if there are no prefixes for the word.
+  #  * @return The vocalizations
+  #
+  def get_prefixes_vocalizations
+    vocalizations(false,@prefixesPOS,false)
+  end
+  # Returns the vocalizations of the <b>recomputed</b> prefixes in arabic
+  # or <b>nil</b> if there are no prefixes for the word.
+  #  * @return The vocalizations
+  #
+  def get_prefixes_arabic_vocalizations
+    vocalizations(true,@prefixesPOS,false)
+  end
+  # Returns the vocalization of the <b>recomputed</b> stem in the Buckwalter transliteration system
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The vocalization
+  #
+  def get_stem_vocalization
+    vocalizations(false,@stemsPOS,true)
+  end
+  # Returns the vocalization of the <b>recomputed</b> stem in arabic
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The vocalization
+  #
+  def get_stem_arabic_vocalization
+    vocalizations(true,@stemsPOS,true)
+  end
+  # Returns the vocalizations of the <b>recomputed</b> suffixes in the Buckwalter transliteration system
+  # or  <b>nil</b> if there are no suffixes for the word.
+  #  * @return The vocalizations
+  #
+  def get_suffixes_vocalizations
+    vocalizations(false,@suffixesPOS,false)
+  end
+  # Returns the vocalizations of the <b>recomputed</b> suffixes in arabic
+  # or <b>nil</b> if there are no suffixes for the word.
+  #  * @return The vocalizations
+  #
+  def get_suffixes_arabic_vocalizations
+    vocalizations(true,@suffixesPOS,false)
+  end
+  # Returns the vocalization of the word in the Buckwalter transliteration system.
+  #  * @return The vocalization
+  #
+  def get_word_vocalization
+    sb = ""
+    sb.force_encoding "UTF-8"
+    vocal = get_prefixes_vocalizations()
+    if(vocal!=nil)
+      sb += vocal[0].to_s
+    end
+    s =get_stem_vocalization()
+    if ( s != nil)
+      sb+=s
+    end
+    vocal =get_suffixes_vocalizations()
+    if(vocal!=nil)
+      sb += vocal[0].to_s
+    end
+    return sb
+  end
+  # Returns the vocalization of the word in arabic.
+  #  * @return The vocalization
+  #
+  def get_word_arabic_vocalization
+    sb = ""
+    sb.force_encoding "UTF-8"
+    vocal = get_prefixes_arabic_vocalizations()
+      sb += vocal[0].to_s if vocal!=nil
+    s = get_stem_arabic_vocalization()
+      sb+=s if s!= nil
+    vocal = get_suffixes_arabic_vocalizations()
+    if(vocal!=nil)
+      sb += vocal[0].to_s
+    end
+    return sb
+  end
+  # Returns the morphology of the prefix.
+  #  * @return The morphology
+  #
+  def get_prefix_morphology
+    @prefix.morphology
+  end
+  # Returns the morphology of the stem.
+  #  * @return The morphology
+  #
+  def get_stem_morphology
+    @stem.morphology
+  end
+  # Returns the morphology of the suffix.
+  #  * @return The morphology
+  #
+  def get_suffix_morphology
+    @suffix.morphology
+  end
+  # Returns the morphology of the word.
+  #  * @return The morphology
+  #
+  def get_word_morphology
+    sb = ""
+    sb.force_encoding "UTF-8"
+    if (!@prefix.morphology.empty? and @prefix.morphology != nil )
+          sb+= "\tprefix : #{@prefix.morphology}\n"
+    end
+    if (!@stem.morphology.empty? and @stem.morphology != nil)
+          sb+= "\tstem : #{@stem.morphology}\n"
+    end
+    if (!@suffix.morphology.empty? and @suffix.morphology != nil)
+          sb+= "\tsuffix : #{@suffix.morphology}\n"
+    end
+    return sb
+   end
+  # Returns the grammatical categories of the <b>recomputed</b> prefixes
+  # or <b>nil</b> if there are no prefixes for the word.
+  #  * @return The grammatical categories
+  #
+  def get_prefixes_POS
+    perform_on_POS(1,@prefixesPOS,1)
+  end
+  # Returns The vocalizations using the Buckwalter transliteration system of the  <b>recomputed</b> prefixes and their grammatical categories
+  # or <b>nil</b> if there are no prefixes for the word.
+  #  * @return The vocalizations and the grammatical categories
+  #
+  def get_prefixes_long_POS
+    perform_on_POS(2,@prefixesPOS,1)
+  end
+  # Returns The vocalizations in arabic of the <b>recomputed</b> prefixes and their grammatical categories
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The vocalizations and the grammatical categories.
+  #
+  def get_prefixes_arabic_long_POS
+    perform_on_POS(3,@prefixesPOS,1)
+  end
+  # Returns the grammatical category of the <b>recomputed</b> stem.
+  #  * @return The grammatical category
+  #
+  def get_stem_POS
+    perform_on_POS(1,@stemsPOS,2)
+  end
+  # Returns The vocalization using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The vocalizations and the grammatical categories.
+  #
+  def get_stem_long_POS
+    perform_on_POS(2,@stemsPOS,2)
+  end
+  # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The vocalizations and the grammatical categories.
+  #
+  def get_stem_arabic_long_POS
+    perform_on_POS(3,@stemsPOS,2)
+  end
+  # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The grammatical categories
+  #
+  def get_suffixes_POS
+    perform_on_POS(1,@suffixesPOS,3)
+  end
+  # Returns The vocalizations using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The vocalizations and the grammatical categories.
+  #
+  def get_suffixes_long_POS
+    perform_on_POS(2,@suffixesPOS,3)
+  end
+  # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
+  # or <b>nil</b> if there is no stem for the word.
+  #  * @return The vocalizations and the grammatical categories.
+  #
+  def get_suffixes_arabic_long_POS
+    perform_on_POS(3,@suffixesPOS,3)
+  end
+  # Returns The vocalization of the word in the Buckwalter transliteration system and its grammatical categories.
+  #  * @return The vocalization and the grammatical categories
+  #
+  def get_word_long_POS
+    word_POS(false)
+  end
+  # Returns The vocalization of the word in arabic and its grammatical categories.
+  #  * @return The vocalization and the grammatical categories
+  #
+  def get_word_arabic_long_POS
+    word_POS(true)
+  end
+  # Returns the english glosses of the prefixes.
+  #  * @return The glosses.
+  #
+  def get_prefixes_glosses
+    if(@prefixesGlosses.empty?)
+      return nil
+    else
+      return @prefixesGlosses
+    end
+  end
+  # Returns the english gloss of the stem.
+  #  * @return The gloss.
+  #
+  def get_stem_gloss
+    if (@stemsGlosses.empty?)
+      return nil
+    end
+    if ((@stemsGlosses.length > 1) and @debug)
+      puts "More than one gloss for " + @stemsGlosses.to_s
+    end
+    #return the first anyway :-(
+    return @stemsGlosses[0]
+  end
+  # Returns the english glosses of the suffixes.
+  #  * @return The glosses.
+  #
+  def get_suffixes_glosses
+    if(@suffixesGlosses.empty?)
+      return nil
+    else
+      return @suffixesGlosses
+    end
+  end
+  # Returns the english glosses of the word.
+  #  * @return The glosses.
+  #
+  def get_word_glosses
+    sb = ""
+    sb.force_encoding "UTF-8"
+    glosses = get_prefixes_glosses()
+    if (glosses and glosses[0] != nil)
+          sb+=("\tprefix : #{glosses[0].gsub(";","/")}\n")
+    end
+    if (get_stem_gloss() != nil)
+      sb+=("\tstem : #{get_stem_gloss().gsub(";","/")}\n")
+    end
+    glosses = get_suffixes_glosses()
+    if (glosses and glosses[0] != nil)
+          sb+=("\tsuffix : #{glosses[0].gsub(";","/")}\n")
+    end
+    return sb
+  end
+  # Returns a string representation of how the word can be analyzed using the Buckwalter transliteration system for the vocalizations.
+  # * @return The representation
+  #
+  public
+  def to_s
+    ret = ""
+    ret.force_encoding "UTF-8"
+    ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma  :  #{ get_lemma()  } \n
+       Vocalized as :  \t #{get_word_vocalization()} \n
+       Morphology :  \n #{ get_word_morphology()}
+      Grammatical category :   \n
+      #{get_word_long_POS()} Glossed as :  \n
+      #{get_word_glosses()} "
+    ret
+  end
+  # Returns a string representation of how the word can be analyzed using arabic for the vocalizations..
+  #  * @return The representation
+  #
+  def to_arabized_string
+    ret = ""
+    ret.force_encoding "UTF-8"
+    ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma  :  #{ get_lemma()  } \n
+       Vocalized as :  \t #{get_word_arabic_vocalization()} \n
+       Morphology :  \n #{ get_word_morphology()}
+      Grammatical category :   \n
+      #{get_word_arabic_long_POS()} Glossed as :  \n
+      #{get_word_glosses()} "
+    ret
+  end
+  private
+  # Returns an array of vocalizations according to type specified in the given parameters
+  # * [arabic] Whether or not vocalization is for arabic
+  # * [arr] The array utilized, either of prefixes, stems, suffixes
+  # * [one] Whether or not we are manipulating single vocalization (only true for stem vocalizations, false for suffixes and prefixes)
+  #
+  def vocalizations(arabic, arr, one)
+    if (arr.empty?)
+      return nil
+    end
+    vocalizations = []
+    arr.each do |pos|
+            array = pos.split("/")
+            if(arabic)
+              sb = LatinArabicTranslator.translate(array[0])
+              sb.force_encoding "UTF-8"
+              vocalizations <<  sb
+            else
+              vocalizations <<  array[0]
+            end
+    end
+    if(one)
+      if ( (vocalizations.length > 1) and @debug)
+        puts "More than one stem for " + vocalizations.to_s
+      end
+      return vocalizations[0]
+    else
+      return vocalizations
+    end
+  end
+  # Returns an array of vocalizations according to type specified in the given parameters
+  # * [type] Specifies  the type of the function to perform, (1 for regular, 2 for long, 3 for arabic)
+  # * [arr] The array utilized, either of prefixes, stems, suffixes
+  # * [pre_stem_suff] Specifying which type of arrays are being handled (1 for prefixes, 2 for stems, 3 for suffixes)
+  #
+  def perform_on_POS(type, arr, pre_stem_suff)
+    if (arr.empty?)
+      return nil
+    end
+    temp_POS = []
+    arr.each do |pos|
+      array = pos.split("/")
+      j=1
+      if(type==1)
+        sb = ""
+      elsif(type==2)
+        sb = "#{array[0]}\t"
+      else
+        sb = "#{LatinArabicTranslator.translate(array[0])}\t"
+        sb.force_encoding "UTF-8"
+      end
+      sb <<  array[1..array.length].join(" / ")
+      temp_POS.push(sb)
+    end
+    if(pre_stem_suff==2)
+      if ((temp_POS.length > 1) and @debug)
+        puts "More than one stem for #{temp_POS.to_s}"
+      end
+      if (type ==1 and temp_POS[0].empty?)
+        puts "Empty POS for stem #{get_stem_long_POS()}"
+      end
+      #return the first anyway :-(
+      return temp_POS[0]
+    else
+      return temp_POS
+    end
+  end
+  # Returns the vocalizations and the grammatical categories
+  # * [arabic] Boolean to choose, Buckwalter transliteration system or arabic
+  #
+  def word_POS(arabic)
+    sb=""
+    if(arabic)
+      temp_POS =get_prefixes_arabic_long_POS()
+    else
+      temp_POS =get_prefixes_long_POS()
+    end
+    if (temp_POS != nil)
+              if (temp_POS[0]!=nil)
+                sb << ("\tprefix : #{temp_POS[0]}\n")
+              end
+    end
+    if(arabic)
+      s = get_stem_arabic_long_POS()
+    else
+      s = get_stem_long_POS()
+    end
+    if ( s != nil)
+      sb << ("\tstem : #{s}  \n")
+    end
+    if(arabic)
+      temp_POS =get_suffixes_arabic_long_POS()
+    else
+      temp_POS =get_suffixes_long_POS()
+    end
+    if (temp_POS != nil)
+              if (temp_POS[0]!=nil)
+                sb << ("\tsuffix : #{temp_POS[0]}\n")
+              end
+    end
+    return sb
+  end
+end
+class String
+  def ends_with_suffix_set?(ends_with_suffix_set)
+      length = self.length
+      length.times { |i|
+	      return true if ends_with_suffix_set.member?(self[i..length])
+	  }
+    return false
+  end
+ end

data/lib/raramorph/translator.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# Class For Translation
+# Author:: eSpace technologies  www.eSpace.com.eg
+# Copyright:: 2008
+class Translator
+      TABLE = {     "ہ"=>"A" , "ء"=>"A","آ"=>"A" ,"أ"=>"A","ؤ"=>"A", "إ"=>"A",
+                                  "ا"=>"C" ,
+                                  "ب"=>"E", "ة"=>"E" , "ت"=>"E" , "ث"=>"E",
+                                  "ج"=>"I" , "ح"=>"I" , "خ"=>"I" , "د"=>"I",
+                                   "ر"=>"N" ,
+                                   "ز"=>"O" , "س"=>"O" ,  "ش"=>"O" , "ص"=>"O" , "ض"=>"O" ,
+                                   "ظ"=>"U" , "ع"=>"U" , "غ"=>"U" , "ـ"=>"U" ,
+                                   "à"=>"a" , "ل"=>"a" , "â"=>"a" , "م"=>"a" , "ن"=>"a" , "ه"=>"a" ,
+                                   "ç"=>"c" ,
+                                   "è"=>"e" , "é"=>"e" , "ê"=>"e" ,  "ë"=>"e" ,
+                                   "ى"=>"i" , "ي"=>"i" , "î"=>"i" , "ï"=>"i" ,
+                                   "ٌ"=>"n" ,
+                                   "ٍ"=>"o" ,  "َ"=>"o" , "ô"=>"o" , "ُ"=>"o" , "ِ"=>"o" ,
+                                   "ù"=>"u" , "ْ"=>"u" , "û"=>"u" , "ü"=>"u" ,
+                                   "ئ"=>"AE" ,  "ٹ"=>"Sh" , "ژ"=>"Zh" , "ك"=>"ss" , "و"=>"ae" , "ڑ"=>"sh" , "‍"=>"zh" }
+  # * Translate The String
+  def translate(string)
+         result = ""
+         i = 0
+         ## IF non Utf8 Char return
+         return string unless string.length % 2  ==0
+         while i < string.length-1
+            char = string[i..i+1]
+            result+=  TABLE[char].nil? ? char : TABLE[char]
+            i+=2
+          end
+          result
+  end
+  def table(str)
+    TABLE[str]
+  end
+end

data/lib/raramorph.rb ADDED Viewed

@@ -0,0 +1,16 @@
+#Dir[File.join(File.dirname(__FILE__), 'raramorph/**/*.rb')].sort.each { |lib| require lib }
+$:.unshift File.expand_path(File.dirname(__FILE__) )
+start = Time.now
+require 'set'
+require 'stringio'
+require 'raramorph/logger'
+require 'raramorph/translator'
+require 'raramorph/arabic_latin_translator'
+require 'raramorph/latin_arabic_translator'
+require 'raramorph/in_memory_dictionary_handler'
+require 'raramorph/in_memory_solutions_handler'
+require 'raramorph/solution'
+require 'raramorph/dictionary_entry'
+require 'raramorph/raramorph'
+puts "Time Elapsed loading dictionaries= " + ( Time.now - start).to_s

data/lib/raramorph_main.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# ARGV[0] # Input File Name
+# ARGV[1] # Outpute File Name
+# ARGV[2] # Verbose Default False
+# ARGV[4] # BuckWalter  Default False ( Arabic Output)
+ $:.unshift File.expand_path(File.dirname(__FILE__) )
+ if ARGV.length >= 2 and ARGV.length <= 4
+ require 'raramorph'
+ start = Time.now
+ verbose = false
+ not_arabic = true
+ verbose = true  if ARGV[2] and ARGV[2] == "-v"
+ not_arabic = false   if ARGV[3] and ARGV[3] == "-a"
+ not_arabic = false if ARGV[2] and ARGV[2] == "-a"
+ Raramorph.execute(ARGV[0] , ARGV[1] , verbose , not_arabic )
+    puts "Time Elapsed= " + ( Time.now - start).to_s
+ else
+    puts("Arabic Morphological Analyzer for Ruby")
+    puts("Ported to Ruby  by Moustafa Emara and Hany Salah El din , eSpace-technologies.(www.espace.com.eg) ,  2008.")
+    puts("Based on :")
+    puts("BUCKWALTER ARABIC MORPHOLOGICAL ANALYZER")
+    puts("This program is developed under the MIT-Licences")
+    puts("Usage :")
+    puts("")
+    puts("raraMorph inFile [inEncoding] [outFile]  [-v] [-a]")
+    puts("")
+    puts("inFile : file to be analyzed")
+    puts("inEncoding : encoding for inFile, default UTF-8")
+    puts("outFile : result file ")
+    puts("-v : verbose mode")
+    puts("-a : Aarbic Output" )
+ end