RubyGems - raramorph - Versions diffs - 0.1.0 → 0.1.1 - Mend

raramorph 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/README +56 -0
data/lib/raramorph/arabic_latin_translator.rb +1 -5
data/lib/raramorph/dictionary_entry.rb +1 -1
data/lib/raramorph/in_memory_dictionary_handler.rb +58 -20
data/lib/raramorph/in_memory_solutions_handler.rb +1 -1
data/lib/raramorph/latin_arabic_translator.rb +1 -1
data/lib/raramorph/logger.rb +20 -0
data/lib/raramorph/raramorph.rb +53 -173
data/lib/raramorph/solution.rb +81 -164
data/lib/raramorph/translator.rb +1 -1
data/lib/raramorph.rb +3 -1
data/lib/raramorph_main.rb +11 -6
data/raramorph.gemspec +6 -4
metadata +8 -5

data/README ADDED Viewed

@@ -0,0 +1,56 @@
+== Raramorph
+By eSpace-technologies
+http://www.espace.com.eg
+http://www.espace.com.eg/docs/raramorph/index.html
+http://github.com/espace/raramorph
+http://raramorph.rubyforge.org
+== DESCRIPTION:
+Raramorph is a Ruby 1.9 gem for an intelligent port Aramorph based on Buckwalter Arabic Morphological Analyzer Version 1.0.
+== Usage
+ require 'raramorph'
+ # For analyzing a file
+  Raramorph.execute(input_filename, output_filename ,verbose = false, not_arabic = true)
+ # You can use functions like analyze_token , tokenize , segement_word  all as static methods in Raramorph class
+OR
+From the command line
+raramorph input_file_name output_file_name  -v -a
+  -v verbose mode ( optional )
+  -a arabic output ( optional )
+== INSTALL:
+sudo gem install raramorph
+=== Source Code =====
+http://github.com/espace/raramorph/tree/master
+== LICENSE:
+(The MIT License)
+Copyright (c) 2008 Moustafa Emara , Hany Salah el deen
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/lib/raramorph/arabic_latin_translator.rb CHANGED Viewed

@@ -2,9 +2,7 @@
 # Author:: eSpace technologies  www.eSpace.com.eg
 # Copyright:: 2008
 #
 class ArabicLatinTranslator
   # * Table Used for Tranlation From Arabic To English I.e ( Romanize Word )
   # * According to  Buckwalter system Dictionary
   TABLE =   { "\u0621"=> "'" , "\u0622"=> "|" , "\u0623"=> ">" , "\u0624"=> "&" , "\u0625"=> "<" , "\u0626"=> "}" ,
@@ -20,9 +18,7 @@ class ArabicLatinTranslator
   #Not suitable for morphological analysis : remove all vowels/diacritics, i.e. undo the job !
   VOWEL_REMOVER = Regexp.compile("[FNKaui~o]")
   STRIPER =  Regexp.compile("[`\\{]")
- def initilaize
- end
  # * Translate : Transilerate the arabic word to  Roman lettered Word
  # * [word] Word String To be processed

data/lib/raramorph/dictionary_entry.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # Class For Storing Dictionary Entries
 # Author:: eSpace technologies  www.eSpace.com.eg
 # Copyright:: 2008
-#
 class DictionaryEntry
         ## Constructs a Dictionary Entry

data/lib/raramorph/in_memory_dictionary_handler.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # Class For Storing And Loading Dictionaries
 # Author:: eSpace technologies  www.eSpace.com.eg
 # Copyright:: 2008
-#
 require 'rubygems'
 class InMemoryDictionaryHandler
@@ -25,25 +25,24 @@ class InMemoryDictionaryHandler
     ### Variables #####
       @@handler  = nil
       @@regex = Regexp.compile(".*" + "<pos>(.+?)</pos>" + ".*")
-      @@morphology_regexs=[]
-      #@@leema_starter = Regexp.compile(";; ")
-      @@morphology_regexs[0] = Regexp.compile("^(Pref-0|Suff-0)$")
-      @@morphology_regexs[1] = Regexp.compile("^F" + ".*")
-      @@morphology_regexs[2] = Regexp.compile("^IV" + ".*")
-      @@morphology_regexs[3] = Regexp.compile("^PV" + ".*")
-      @@morphology_regexs[4] = Regexp.compile("^CV" + ".*")
-      @@morphology_regexs[5] = Regexp.compile("^N" + ".*")
-      @@morphology_regexs[6] = Regexp.compile("^[A-Z]" + ".*")
-      @@morphology_regexs[7] = Regexp.compile(".*" + "iy~$")
+      @@morphology_regexs=[Regexp.compile("^(Pref-0|Suff-0)$") ,
+                           Regexp.compile("^F" + ".*") ,
+                           Regexp.compile("^IV" + ".*") ,
+                           Regexp.compile("^PV" + ".*") ,
+                           Regexp.compile("^CV" + ".*") ,
+                           Regexp.compile("^N" + ".*") ,
+                           Regexp.compile("^[A-Z]" + ".*") ,
+                           Regexp.compile(".*" + "iy~$")
+                           ]
       @@compatability_stpliter = Regexp.compile("\\s+")
-      @@vocalization_array =[]
-      @@vocalization_array[0] = "/FUNC_WORD"
-      @@vocalization_array[1] ="/VERB_IMPERFECT"
-      @@vocalization_array[2] ="/VERB_PERFECT"
-      @@vocalization_array[3] ="/VERB_IMPERATIVE"
-      @@vocalization_array[4] = "/NOUN_PROP"
-      @@vocalization_array[5] ="/NOUN"
-      @@vocalization_array[6] = "/NOUN"
+      @@vocalization_array =["/FUNC_WORD" ,
+                             "/VERB_IMPERFECT" ,
+                            "/VERB_PERFECT" ,
+                            "/VERB_IMPERATIVE" ,
+                            "/NOUN_PROP" ,
+                            "/NOUN" ,
+                            "/NOUN"
+                               ]
       @@prefixes_stems_compatibility = Set.new
     #Changed
@@ -163,6 +162,44 @@ class InMemoryDictionaryHandler
     @@suffixes = suffixes
   end
+  def analyze_word_in_dictionaries(segmented_word , word_solutions , verbose  , count)
+       #Is prefix known ?
+       if has_prefix?(segmented_word.prefix)
+         #Is stem known ?
+         # puts "has prefix"
+         if has_stem?(segmented_word.stem)
+          # puts "has stem"
+           #Is suffix known ?
+           if has_suffix?(segmented_word.suffix)
+           #  puts "has suffix"
+             #Compatibility check
+              @@prefixes[segmented_word.prefix].each{|prefix|
+                @@stems[segmented_word.stem].each {|stem|
+                  #Prefix/Stem compatibility
+                    if prefixes_stems_compatible?(prefix.morphology ,stem.morphology )
+                      # puts "has A B Com"
+                      @@suffixes[segmented_word.suffix].each {|suffix|
+                       # Prefix/Suffix compatiblity
+                       if prefixes_suffixes_compatible?(prefix.morphology , suffix.morphology)
+                         # puts "has A C Com"
+                          # Stems/Suffixes compatiblity
+                         if stems_suffixes_compatible?(stem.morphology , suffix.morphology)
+                          # puts "has  B  C COM"
+                            #All tests passed : it is a solution
+                            count = count + 1
+                            word_solutions << Solution.new(verbose , count , prefix , stem , suffix )
+                         end
+                       end
+                      }
+                    end
+                }
+              }
+           end
+         end
+       end
+	   return count
+  end
  private
   # * load Dictionary from files
@@ -241,7 +278,8 @@ class InMemoryDictionaryHandler
              vocalization = splited_line[1]
              morphology = splited_line[2]
              gloss_pos = splited_line[3]
-             gloss , pos = ""
+             gloss = ""
+             pos = ""
              # two ways to get the POS info
              # (1) explicitly, by extracting it from the gloss field:

data/lib/raramorph/in_memory_solutions_handler.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 #
 # Author:: eSpace technologies  www.eSpace.com.eg
 # Copyright:: 2008
-#
 class InMemorySolutionsHandler

data/lib/raramorph/latin_arabic_translator.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # Class For  Latin Arabic Transileration
 # Author:: eSpace technologies  www.eSpace.com.eg
 # Copyright:: 2008
-#
 class LatinArabicTranslator

data/lib/raramorph/logger.rb ADDED Viewed

@@ -0,0 +1,20 @@
+class Logger
+  attr_reader :verbose , :output
+  def initialize(verbose = nil  , output = nil )
+     @verbose = verbose
+	 @output = output
+	 @stream = StringIO.new
+  end
+  def info string , require_verbose = false
+    @stream.puts(string) #if (  require_verbose && @verbose  || ! require_verbose )
+  end
+  def log
+    return  puts @stream.string  if @output.nil?
+	File.open(@output , "w") { |f|
+	 f.puts @stream.string }
+  end
+end

data/lib/raramorph/raramorph.rb CHANGED Viewed

@@ -1,10 +1,8 @@
-# A Ruby port of Buckwalter Arabic Morphological Analyzer Version 1.0.
-#
+# A Ruby port of Buckwalter  Morphological Analyzer Version 1.0.
 # Author:: eSpace technologies  www.eSpace.com.eg
 # Copyright:: 2008
-require 'set'
 class Raramorph
@@ -13,48 +11,14 @@ class Raramorph
   # The solutions handler.
   @@sol = InMemorySolutionsHandler.create
   # Whether or not the analyzer should output some convenience messages
-  @verbose
-  # The stream where to output the results
-  @output_stream
-  #use arabic translation or not?
-  @not_arabic
-  #Stats
-  # Lines processed
-   @lines_counter = 0
-  # Arabic tokens processed
-   @not_arabic_tokens_counter = 0
-  # Not arabic tokens processed
-   @not_arabic_tokens_counter = 0
-  # Arabic words which have been succesfully analyzed.
-  # * [key] = word
-  # * [value] = occurences
-  #
-   @found = {}
-  # Arabic words which have not been succesfully analyzed.
-  # * [key] = word
-  # * [value] = occurences
-  #
-   @not_found = {}
   # Alternative spellings list of regular expressions
-  @@alternative_spellings = []
-  @@alternative_spellings[0] = Regexp.compile(".*" + "Y'$")
-  @@alternative_spellings[1] = Regexp.compile(".*" + "y'$")
-  @@alternative_spellings[2] = Regexp.compile(".*" + "y$")
-  @@alternative_spellings[3] = Regexp.compile(".*" + "h$")
-  @@alternative_spellings[4] = Regexp.compile(".*" + "p$")
+  @@alternative_spellings = [Regexp.compile(".*" + "Y'$") ,
+                             Regexp.compile(".*" + "y'$") ,
+                             Regexp.compile(".*" + "y$") ,
+                             Regexp.compile(".*" + "h$") ,
+                             Regexp.compile(".*" + "p$") ]
   @@space_regex = Regexp.compile("\\s+")
-  def self.set_verbose(verbose) #Bolean Variable
-    @verbose = verbose
-  end
   # * Analyze and Process the file ( i.e Doing the morphological Analysis )
   # * [file_reader_in] Input File Path
   # * [output_buckwalter] whether the output in buckwalter indications ( i.e Roman letters ) or arabic letters
@@ -63,8 +27,8 @@ class Raramorph
      lines= IO.readlines(file_reader_in)
       lines.each do |line|
         @lines_counter+=1
-        if(@verbose)
-           puts "Processing line : "+ @lines_counter.to_s
+        if(@logger.verbose)
+           puts "Processing line : #{@lines_counter.to_s}"
         end
         tokens = tokenize(line)
         tokens.each do |token|
@@ -72,7 +36,7 @@ class Raramorph
         end
       end
     #rescue
-    #  @stream.puts "Can not read line " + @lines_counter.to_s
+    #  @logger.info "Can not read line " + @lines_counter.to_s
     #end
   end
@@ -102,7 +66,7 @@ class Raramorph
   def self.analyze_token(token ,  output_buckwalter) #STring  , Boolean , REturn Boolean
      #TO DO SET UP THE PRINT STREAM
      token.force_encoding "UTF-8"
-     @stream.puts "Processing token : " + "\t" + token
+     @logger.info "Processing token : " + "\t" + token
      #TODO : check accuracy
      #ignored \u0688 : ARABIC LETTER DDAL
      #ignored \u06A9 : ARABIC LETTER KEHEH
@@ -119,7 +83,7 @@ class Raramorph
           sub_tokens.each{|sub_token|
             unless  sub_token.strip == ""
               @not_arabic_tokens_counter+=1
-              @output_stream != nil ? @stream.puts("Non-Arabic : " + sub_token) : puts("Non-Arabic : " + sub_token)
+              @logger.info("Non-Arabic : #{sub_token}")
             end
           }
           return false
@@ -128,31 +92,30 @@ class Raramorph
        @not_arabic_tokens_counter+=1
        translitered = ArabicLatinTranslator.translate(token)
-       @output_stream != nil ? @stream.puts("Transliteration : " + "\t" + translitered) : puts("Transliteration : " + "\t" + translitered)
+       @logger.info("Transliteration : \t#{translitered}")
       if @found.has_key?(translitered)        #Already processed : previously found
-        @output_stream != nil  && @verbose ? @stream.puts("Token already processed.") : puts("Token already processed.")
+        @logger.info("Token already processed." , true )
         #increase reference counter
         @found[translitered]+=1
         has_solutions = true
        elsif @not_found.has_key?(translitered) #Already processed : previously not found
-        @output_stream != nil  && @verbose ? @stream.puts("Token already processed without solution.") : puts("Token already processed without solution.")
+        @logger.info("Token already processed without solution." , true )
         @not_found[translitered]+=1         #increase reference counter
         has_solutions = false
        else
-        @output_stream != nil  && @verbose ? @stream.puts("Token not yet processed.") : puts("Token not yet processed.")
+        @logger.info("Token not yet processed.", true )
         if (feed_word_solutions(translitered)) #CHANGED  #word has solutions...
           #mark word as found
           raise "There is already a key for " + translitered + " in found" if @found.has_key?(translitered)
-          @output_stream != nil  && @verbose ? @stream.puts("Token has direct solutions.") : puts("Token has direct solutions.")
+          @logger.info("Token has direct solutions." , true )
           #set reference counter to 1
           @found[translitered] = 1
           has_solutions = true
         else #word has no direct solution
            if(feed_alternative_spellings(translitered))
              alternatives_give_solutions = false
              alternatives = @@sol.get_alternative_spellings(translitered)
              alternatives.each{|alternative|
               alternatives_give_solutions =  (alternatives_give_solutions || feed_word_solutions(alternative))
@@ -160,21 +123,21 @@ class Raramorph
              if(alternatives_give_solutions)
                #consistency check
                raise "There is already a key for " + translitered + " in found" if @found.has_key?(translitered)
-               @output_stream != nil  && @verbose ? @stream.puts("Token's alternative spellings have solutions.") : puts("Token's alternative spellings have solutions.")
+               @logger.info("Token's alternative spellings have solutions." , true )
                #mark word as found set reference counter to 1
                @found[translitered] = 1
                has_solutions = true
              else
                #consistency check
                raise "There is already a key for " + translitered + " in notFound" if @not_found.has_key?(translitered)
-               @output_stream != nil  && @verbose ? @stream.puts("Token's alternative spellings have no solution.") : puts("Token's alternative spellings have no solution.")
+               @logger.info("Token's alternative spellings have no solution." , true )
                @not_found[translitered]=1
                has_solutions = false
            end
          else
            #there are no alternative
            raise "There is already a key for " + translitered + " in notFound" if @not_found.has_key?(translitered)
-           @output_stream != nil  && @verbose ? @stream.puts("Token has no solution and no alternative spellings.") : puts("Token has no solution and no alternative spellings.")
+           @logger.info("Token has no solution and no alternative spellings." , true )
            #mark word as not found and set reference counter to 1
            @not_found[translitered]=1
            has_solutions = false
@@ -184,22 +147,24 @@ class Raramorph
         #output solutions : TODO consider XML output
-        if @output_stream != nil
+        if @logger.output != nil
           if @found.has_key?(translitered)
             if @@sol.has_solutions(translitered)
-              @@sol.get_solutions(translitered).each{|solution| @stream.puts "#{output_buckwalter ? solution.to_s : solution.to_arabized_string}"}
+              @@sol.get_solutions(translitered).each{|solution| @logger.info "#{output_buckwalter ? solution.to_s : solution.to_arabized_string}"
+			  }
             end
             if @@sol.has_alternative_spellings(translitered)
-              @output_stream != nil  && @verbose ? @stream.puts("No direct solution") : puts("No direct solution")
+              @logger.info("No direct solution" , true )
               @@sol.get_alternative_spellings(translitered).each{|alternative|
-                 @output_stream != nil  && @verbose ? @stream.puts("Considering alternative spelling :" + "\t" + alternative) : puts("Considering alternative spelling :" + "\t" + alternative)
+               @logger.info("Considering alternative spelling :" + "\t#{alternative}" , true )
                  if @@sol.has_solutions(alternative)
-                   @@sol.get_solutions(alternative).each{|solution| @stream.puts "#{output_buckwalter ? solution.to_s : solution.to_arabized_string}"}
+                   @@sol.get_solutions(alternative).each{|solution| @logger.info "#{output_buckwalter ? solution.to_s : solution.to_arabized_string}"
+				   }
                  end
               }
             end
           elsif @not_found.has_key?(translitered)
-            @stream.puts "\nNo solution\n"
+            @logger.info "\nNo solution\n"
           else
             raise "#{translitered} is neither in found or notFound !"
           end
@@ -220,40 +185,7 @@ class Raramorph
      segments = segment_word(translitered) #Hash Set of Segement Words Objects
      #Brute force algorithm
      segments.each{|segmented_word|
-       #Is prefix known ?
-       if @@dict.has_prefix?(segmented_word.prefix)
-         #Is stem known ?
-         # puts "has prefix"
-         if @@dict.has_stem?(segmented_word.stem)
-          # puts "has stem"
-           #Is suffix known ?
-           if @@dict.has_suffix?(segmented_word.suffix)
-           #  puts "has suffix"
-             #Compatibility check
-              @@dict.prefixes[segmented_word.prefix].each{|prefix|
-                @@dict.stems[segmented_word.stem].each {|stem|
-                  #Prefix/Stem compatibility
-                    if @@dict.prefixes_stems_compatible?(prefix.morphology ,stem.morphology )
-                      # puts "has A B Com"
-                      @@dict.suffixes[segmented_word.suffix].each {|suffix|
-                       # Prefix/Suffix compatiblity
-                       if @@dict.prefixes_suffixes_compatible?(prefix.morphology , suffix.morphology)
-                         # puts "has A C Com"
-                          # Stems/Suffixes compatiblity
-                         if @@dict.stems_suffixes_compatible?(stem.morphology , suffix.morphology)
-                          # puts "has  B  C COM"
-                            #All tests passed : it is a solution
-                            count = count + 1
-                            word_solutions << Solution.new(@verbose , count , prefix , stem , suffix )
-                         end
-                       end
-                      }
-                    end
-                }
-              }
-           end
-         end
-       end
+	   count  = @@dict.analyze_word_in_dictionaries(segmented_word , word_solutions , @logger.verbose , count )
      }
       #Add all solutions, if any
@@ -322,37 +254,29 @@ class Raramorph
   # * Find Alternative Spellings for the translitered word
   # * [translitered] word to be proccesed
   def self.feed_alternative_spellings(translitered)
-            return true  if(@@sol.has_alternative_spellings(translitered))
+    return true  if(@@sol.has_alternative_spellings(translitered))
     word_alternative_spellings = Set.new
     temp = translitered
     if( temp.match(@@alternative_spellings[0]) )
       temp.gsub!(/Y/, "y")
-      if(@verbose)
-        @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-      end
+      @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
       word_alternative_spellings.add(temp)
       temp2 = temp.sub(/w/, "&")
       if(temp!=temp2)
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info  "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
       temp=translitered
       temp.gsub!(/Y/,"y")
       temp.sub!(/y'$/,"}")
-      if(@verbose)
-        @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-      end
+      @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
       word_alternative_spellings.add(temp)
       temp2 = temp.sub(/w/, "&")
       if(temp!=temp2)
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
@@ -360,32 +284,24 @@ class Raramorph
       temp2 = temp.gsub(/Y/,"y")
       if(temp != temp2 )
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
       temp2 = temp.sub(/w'/, "&")
       if(temp != temp2 )
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
       temp =translitered
       temp.gsub!(/Y/, "y")
       temp.sub!(/y'$/, "}")
-      if(@verbose)
-        @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-      end
+      @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
       word_alternative_spellings.add(temp)
       temp2 = temp.sub(/w'/, "&")
       if(temp != temp2 )
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
@@ -394,24 +310,18 @@ class Raramorph
       temp2 = temp.sub(/w'/, "&")
       if(temp != temp2 )
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
       temp =translitered
       temp.gsub!(/Y/, "y")
       temp.gsub!(/y$/, "Y")
-      if(@verbose)
-        @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-      end
+      @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
       word_alternative_spellings.add(temp)
       temp2 = temp.sub(/w'/, "&")
       if(temp != temp2 )
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
@@ -419,23 +329,17 @@ class Raramorph
       temp2 = temp.gsub(/Y/,"y")
       if(temp != temp2 )
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
       temp2 = temp.sub(/w'/, "&")
       if(temp != temp2 )
         temp = temp2
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
       end
       temp.sub!(/p$/, "h")
-      if(@verbose)
-        @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-      end
+      @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
       word_alternative_spellings.add(temp)
     else
@@ -443,40 +347,30 @@ class Raramorph
       if(temp!=temp2)
         temp = temp2
         temp.gsub!(/Y/, "y")
-        if(@verbose)
-          @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-        end
+        @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
         word_alternative_spellings.add(temp)
         temp2 = temp.sub(/w'/, "&")
         if(temp != temp2 )
           temp = temp2
-          if(@verbose)
-            @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-          end
+          @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
           word_alternative_spellings.add(temp)
         end
       else
         temp2 = temp.gsub(/Y/, "y")
         if(temp != temp2)
-          if(@verbose)
-            @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-          end
+           @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
           word_alternative_spellings.add(temp)
           temp2 = temp.sub(/w'/, "&")
           if(temp != temp2 )
             temp = temp2
-            if(@verbose)
-              @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-            end
+            @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
             word_alternative_spellings.add(temp)
           end
         else
           temp2 = temp.sub(/w'/, "&")
           if(temp != temp2 )
             temp = temp2
-            if(@verbose)
-              @stream.puts "Found alternative spelling "+ temp + " for word " + translitered
-            end
+            @logger.info "Found alternative spelling #{temp} for word #{translitered}" , true
             word_alternative_spellings.add(temp)
           end
         end
@@ -494,10 +388,9 @@ class Raramorph
   # * [output_filename] Output file path
   # * [verbose] Setter for verbose
   # * [not_arabic] alias for out_put_bucwalter for indicating the output format  in buckwalter indications or will be arabic
-  def self.execute(input_filename, output_filename ,verbose = false, not_arabic = true)
-    @output_stream = true
+  def self.execute(input_filename, output_filename ,verbose = false, not_arabic = false)
+    @logger = Logger.new(true , output_filename )
     @not_arabic = not_arabic
-    @verbose = verbose
     # Lines processed
     @lines_counter = 0
     # Arabic tokens processed
@@ -514,24 +407,11 @@ class Raramorph
     # * [value] = occurences
     #
     @not_found = {}
-    @stream = StringIO.new
     analyze(input_filename , @not_arabic)
-    File.open(output_filename , "w") do |f|
-      f.puts @stream.string
-    end
+    @logger.log
      print_stats
   end
  end
-  class SegmentedWord
-    # Class For  Storing the Data of segmented Word
-    # Author:: eSpace technologies  www.eSpace.com.eg
-    # Copyright:: 2008
-    attr_reader :prefix , :stem , :suffix
-    def initialize(prefix , stem , suffix)
-      @prefix = prefix
-      @stem = stem
-      @suffix = suffix
-    end
-  end
+ class SegmentedWord < Struct.new( :prefix , :stem , :suffix) ; end

data/lib/raramorph/solution.rb CHANGED Viewed

@@ -1,14 +1,47 @@
 # A class to find the solution of the word
-#
 # Author:: eSpace technologies  www.eSpace.com.eg
 # Copyright:: 2008
-#
 class Solution
   attr_reader :prefix, :stem, :suffix, :cnt
+  @@ends_with_set_for_pos_one = Set.new(["CONJ","EMPHATIC_PARTICLE","FUNC_WORD",
+                "FUT_PART","INTERJ","INTERROG_PART","IV1S","IV2MS",
+                "IV2FS","IV3MS","IV3FS","IV2D","IV2FD","IV3MD","IV3FD",
+                "IV1P","IV2MP","IV2FP","IV3MP","IV3FP","NEG_PART",
+                "PREP","RESULT_CLAUSE_PARTICLE"])
+  @@ends_with_set_for_pos_two = Set.new(["CASE_INDEF_NOM","CASE_INDEF_ACC",
+                "CASE_INDEF_ACCGEN","CASE_INDEF_GEN" ,"CASE_DEF_NOM" ,
+                "CASE_DEF_ACC" ,"CASE_DEF_ACCGEN","CASE_DEF_GEN" ,
+                "NSUFF_MASC_SG_ACC_INDEF" ,"NSUFF_FEM_SG" ,"NSUFF_MASC_DU_NOM" ,
+                "NSUFF_MASC_DU_NOM_POSS" ,"NSUFF_MASC_DU_ACCGEN" ,
+                "NSUFF_MASC_DU_ACCGEN_POSS" ,"NSUFF_FEM_DU_NOM" ,
+                "NSUFF_FEM_DU_NOM_POSS" ,"NSUFF_FEM_DU_ACCGEN" ,
+                "NSUFF_FEM_DU_ACCGEN_POSS" ,"NSUFF_MASC_PL_NOM" ,
+                "NSUFF_MASC_PL_NOM_POSS"  ,"NSUFF_MASC_PL_ACCGEN" ,
+                "NSUFF_MASC_PL_ACCGEN_POSS" ,"NSUFF_FEM_PL" ,"POSS_PRON_1S",
+                "POSS_PRON_2MS" ,"POSS_PRON_2FS" ,"POSS_PRON_3MS"  ,
+                "POSS_PRON_3FS","POSS_PRON_2D" ,"POSS_PRON_3D" ,"POSS_PRON_1P",
+                "POSS_PRON_2MP" ,"POSS_PRON_2FP" ,"POSS_PRON_3MP" ,"POSS_PRON_3FP" ,
+                "IVSUFF_DO:1S" ,"IVSUFF_DO:2MS" ,"IVSUFF_DO:2FS" ,"IVSUFF_DO:3MS" ,
+                "IVSUFF_DO:3FS" ,"IVSUFF_DO:2D" ,"IVSUFF_DO:3D" ,"IVSUFF_DO:1P" ,
+                "IVSUFF_DO:2MP" ,"IVSUFF_DO:2FP" ,"IVSUFF_DO:3MP" ,"IVSUFF_DO:3FP" ,
+                "IVSUFF_MOOD:I" ,"IVSUFF_SUBJ:2FS_MOOD:I" ,"IVSUFF_SUBJ:D_MOOD:I" ,
+                "IVSUFF_SUBJ:3D_MOOD:I" ,"IVSUFF_SUBJ:MP_MOOD:I" ,"IVSUFF_MOOD:S",
+                "IVSUFF_SUBJ:2FS_MOOD:SJ" ,"IVSUFF_SUBJ:D_MOOD:SJ","IVSUFF_SUBJ:MP_MOOD:SJ" ,
+                "IVSUFF_SUBJ:3MP_MOOD:SJ" ,"IVSUFF_SUBJ:FP" ,"PVSUFF_DO:1S" ,"PVSUFF_DO:2MS" ,
+                "PVSUFF_DO:2FS" ,"PVSUFF_DO:3MS" ,"PVSUFF_DO:3FS" ,"PVSUFF_DO:2D" ,
+                "PVSUFF_DO:3D" ,"PVSUFF_DO:1P" ,"PVSUFF_DO:2MP" ,"PVSUFF_DO:2FP" ,
+                "PVSUFF_DO:3MP" ,"PVSUFF_DO:3FP" ,"PVSUFF_SUBJ:1S" ,"PVSUFF_SUBJ:2MS" ,
+                "PVSUFF_SUBJ:2FS" ,"PVSUFF_SUBJ:3MS" ,"PVSUFF_SUBJ:3FS" ,"PVSUFF_SUBJ:2MD" ,
+                "PVSUFF_SUBJ:2FD" ,"PVSUFF_SUBJ:3MD" ,"PVSUFF_SUBJ:3FD" ,"PVSUFF_SUBJ:1P" ,
+                "PVSUFF_SUBJ:2MP" ,"PVSUFF_SUBJ:2FP" ,"PVSUFF_SUBJ:3MP" ,"PVSUFF_SUBJ:3FP" ,
+                "CVSUFF_DO:1S" ,"CVSUFF_DO:3MS" ,"CVSUFF_DO:3FS" ,"CVSUFF_DO:3D" ,
+                "CVSUFF_DO:1P" ,"CVSUFF_DO:3MP" ,"CVSUFF_DO:3FP" ,"CVSUFF_SUBJ:2MS" ,
+                "CVSUFF_SUBJ:2FS" ,"CVSUFF_SUBJ:2MP"])
   protected
   # Constructs a solution for a word. Note that the prefix, stem and suffix combination is <b>recomputed</b>
@@ -42,60 +75,35 @@ class Solution
       @stemsGlosses = stem.glosses
       #The suffixes glosses.
       @suffixesGlosses = suffix.glosses
-    if (@stemsPOS.length != @stemsGlosses.length)
-        if (@debug)
-          puts "\"" + get_lemma() + "\" : stem's sizes for POS (" + @stemsPOS.length.to_s + ") and GLOSS ("+ @stemsGlosses.length.to_s + ") do not match"
-        end
-      end
+      puts "\"#{get_lemma()}\" : stem's sizes for POS (\"#{@stemsPOS.length.to_s}\") and GLOSS (\"#{@stemsGlosses.length.to_s}\") do not match" if (@stemsPOS.length != @stemsGlosses.length and @debug)
       #Normalize stems since some of them can contain prefixes
       while(@stemsPOS.length>0)
               stemPOS = @stemsPOS.slice(0)
-              if(stemPOS)
-                stemPOS.force_encoding "UTF-8"
-              end
+              stemPOS.force_encoding "UTF-8" if(stemPOS)
               if (@stemsGlosses.length>0)
                 stemGloss = @stemsGlosses.slice(0)
               else
                 stemGloss = nil
               end
-              if(stemGloss)
-                stemGloss.force_encoding "UTF-8"
-              end
-              if (stemPOS.end_with?("CONJ") or
-                    stemPOS.end_with?("EMPHATIC_PARTICLE") or
-                    stemPOS.end_with?("FUNC_WORD") or
-                    stemPOS.end_with?("FUT_PART") or
-                    stemPOS.end_with?("INTERJ") or
-                    stemPOS.end_with?("INTERROG_PART") or
-                    stemPOS.end_with?("IV1S") or
-                    stemPOS.end_with?("IV2MS") or
-                    stemPOS.end_with?("IV2FS") or
-                    stemPOS.end_with?("IV3MS") or
-                    stemPOS.end_with?("IV3FS") or
-                    stemPOS.end_with?("IV2D") or
-                    stemPOS.end_with?("IV2FD") or
-                    stemPOS.end_with?("IV3MD") or
-                    stemPOS.end_with?("IV3FD") or
-                    stemPOS.end_with?("IV1P") or
-                    stemPOS.end_with?("IV2MP") or
-                    stemPOS.end_with?("IV2FP") or
-                    stemPOS.end_with?("IV3MP") or
-                    stemPOS.end_with?("IV3FP") or
-                    stemPOS.end_with?("NEG_PART") or
-                    stemPOS.end_with?("PREP") or
-                    stemPOS.end_with?("RESULT_CLAUSE_PARTICLE") )
+              stemGloss.force_encoding "UTF-8" if(stemGloss)
+                 if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_one) )
                       @stemsPOS.slice!(0)
                       @prefixesPOS.push(stemPOS)
                       if (stemGloss)
                         @stemsGlosses.slice!(0)
                         @prefixesGlosses.push(stemGloss)
                       end
-              else
-                      break
-              end
+                 else
+                   break
+                 end
       end
       #Normalize stems since some of them can contain suffixes
@@ -112,101 +120,8 @@ class Solution
               if(stemGloss)
                 stemGloss.force_encoding "UTF-8"
               end
-              if (stemPOS.end_with?("CASE_INDEF_NOM") or
-                    stemPOS.end_with?("CASE_INDEF_ACC") or
-                    stemPOS.end_with?("CASE_INDEF_ACCGEN") or
-                    stemPOS.end_with?("CASE_INDEF_GEN") or
-                    stemPOS.end_with?("CASE_DEF_NOM") or
-                    stemPOS.end_with?("CASE_DEF_ACC") or
-                    stemPOS.end_with?("CASE_DEF_ACCGEN") or
-                    stemPOS.end_with?("CASE_DEF_GEN") or
-                    stemPOS.end_with?("NSUFF_MASC_SG_ACC_INDEF") or
-                    stemPOS.end_with?("NSUFF_FEM_SG") or
-                    stemPOS.end_with?("NSUFF_MASC_DU_NOM") or
-                    stemPOS.end_with?("NSUFF_MASC_DU_NOM_POSS") or
-                    stemPOS.end_with?("NSUFF_MASC_DU_ACCGEN") or
-                    stemPOS.end_with?("NSUFF_MASC_DU_ACCGEN_POSS") or
-                    stemPOS.end_with?("NSUFF_FEM_DU_NOM") or
-                    stemPOS.end_with?("NSUFF_FEM_DU_NOM_POSS") or
-                    stemPOS.end_with?("NSUFF_FEM_DU_ACCGEN") or
-                    stemPOS.end_with?("NSUFF_FEM_DU_ACCGEN_POSS") or
-                    stemPOS.end_with?("NSUFF_MASC_PL_NOM") or
-                    stemPOS.end_with?("NSUFF_MASC_PL_NOM_POSS") or
-                    stemPOS.end_with?("NSUFF_MASC_PL_ACCGEN") or
-                    stemPOS.end_with?("NSUFF_MASC_PL_ACCGEN_POSS") or
-                    stemPOS.end_with?("NSUFF_FEM_PL") or
-                    stemPOS.end_with?("POSS_PRON_1S") or
-                    stemPOS.end_with?("POSS_PRON_2MS") or
-                    stemPOS.end_with?("POSS_PRON_2FS") or
-                    stemPOS.end_with?("POSS_PRON_3MS") or
-                    stemPOS.end_with?("POSS_PRON_3FS") or
-                    stemPOS.end_with?("POSS_PRON_2D") or
-                    stemPOS.end_with?("POSS_PRON_3D") or
-                    stemPOS.end_with?("POSS_PRON_1P") or
-                    stemPOS.end_with?("POSS_PRON_2MP") or
-                    stemPOS.end_with?("POSS_PRON_2FP") or
-                    stemPOS.end_with?("POSS_PRON_3MP") or
-                    stemPOS.end_with?("POSS_PRON_3FP") or
-                    stemPOS.end_with?("IVSUFF_DO:1S") or
-                    stemPOS.end_with?("IVSUFF_DO:2MS") or
-                    stemPOS.end_with?("IVSUFF_DO:2FS") or
-                    stemPOS.end_with?("IVSUFF_DO:3MS") or
-                    stemPOS.end_with?("IVSUFF_DO:3FS") or
-                    stemPOS.end_with?("IVSUFF_DO:2D") or
-                    stemPOS.end_with?("IVSUFF_DO:3D") or
-                    stemPOS.end_with?("IVSUFF_DO:1P") or
-                    stemPOS.end_with?("IVSUFF_DO:2MP") or
-                    stemPOS.end_with?("IVSUFF_DO:2FP") or
-                    stemPOS.end_with?("IVSUFF_DO:3MP") or
-                    stemPOS.end_with?("IVSUFF_DO:3FP") or
-                    stemPOS.end_with?("IVSUFF_MOOD:I") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:2FS_MOOD:I") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:D_MOOD:I") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:3D_MOOD:I") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:MP_MOOD:I") or
-                    stemPOS.end_with?("IVSUFF_MOOD:S") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:2FS_MOOD:SJ") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:D_MOOD:SJ") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:MP_MOOD:SJ") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:3MP_MOOD:SJ") or
-                    stemPOS.end_with?("IVSUFF_SUBJ:FP") or
-                    stemPOS.end_with?("PVSUFF_DO:1S") or
-                    stemPOS.end_with?("PVSUFF_DO:2MS") or
-                    stemPOS.end_with?("PVSUFF_DO:2FS") or
-                    stemPOS.end_with?("PVSUFF_DO:3MS") or
-                    stemPOS.end_with?("PVSUFF_DO:3FS") or
-                    stemPOS.end_with?("PVSUFF_DO:2D") or
-                    stemPOS.end_with?("PVSUFF_DO:3D") or
-                    stemPOS.end_with?("PVSUFF_DO:1P") or
-                    stemPOS.end_with?("PVSUFF_DO:2MP") or
-                    stemPOS.end_with?("PVSUFF_DO:2FP") or
-                    stemPOS.end_with?("PVSUFF_DO:3MP") or
-                    stemPOS.end_with?("PVSUFF_DO:3FP") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:1S") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:2MS") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:2FS") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:3MS") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:3FS") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:2MD") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:2FD") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:3MD") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:3FD") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:1P") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:2MP") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:2FP") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:3MP") or
-                    stemPOS.end_with?("PVSUFF_SUBJ:3FP") or
-                    stemPOS.end_with?("CVSUFF_DO:1S") or
-                    stemPOS.end_with?("CVSUFF_DO:3MS") or
-                    stemPOS.end_with?("CVSUFF_DO:3FS") or
-                    stemPOS.end_with?("CVSUFF_DO:3D") or
-                    stemPOS.end_with?("CVSUFF_DO:1P") or
-                    stemPOS.end_with?("CVSUFF_DO:3MP") or
-                    stemPOS.end_with?("CVSUFF_DO:3FP") or
-                    stemPOS.end_with?("CVSUFF_SUBJ:2MS") or
-                    stemPOS.end_with?("CVSUFF_SUBJ:2FS") or
-                    stemPOS.end_with?("CVSUFF_SUBJ:2MP")  )
+              if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_two))
                       @stemsPOS.slice!(@stemsPOS.length-1)
                       @suffixesPOS.insert(0,stemPOS)
                       if (stemGloss)
@@ -332,14 +247,10 @@ class Solution
     sb = ""
     sb.force_encoding "UTF-8"
     vocal = get_prefixes_arabic_vocalizations()
-    if(vocal!=nil)
-      sb += vocal[0].to_s
-    end
+      sb += vocal[0].to_s if vocal!=nil
-    s = get_stem_arabic_vocalization()
-    if ( s != nil)
-      sb+=s
-    end
+    s = get_stem_arabic_vocalization()
+      sb+=s if s!= nil
     vocal = get_suffixes_arabic_vocalizations()
     if(vocal!=nil)
       sb += vocal[0].to_s
@@ -376,13 +287,13 @@ class Solution
     sb = ""
     sb.force_encoding "UTF-8"
     if (!@prefix.morphology.empty? and @prefix.morphology != nil )
-          sb+= "\t" + "prefix : " + @prefix.morphology + "\n"
+          sb+= "\tprefix : #{@prefix.morphology}\n"
     end
     if (!@stem.morphology.empty? and @stem.morphology != nil)
-          sb+= "\t" + "stem : " + @stem.morphology + "\n"
+          sb+= "\tstem : #{@stem.morphology}\n"
     end
     if (!@suffix.morphology.empty? and @suffix.morphology != nil)
-          sb+= "\t" + "suffix : " + @suffix.morphology + "\n"
+          sb+= "\tsuffix : #{@suffix.morphology}\n"
     end
     return sb
    end
@@ -517,14 +428,14 @@ class Solution
     sb.force_encoding "UTF-8"
     glosses = get_prefixes_glosses()
     if (glosses and glosses[0] != nil)
-          sb+=("\t" + "prefix : " + glosses[0].gsub(";","/") + "\n")
+          sb+=("\tprefix : #{glosses[0].gsub(";","/")}\n")
     end
     if (get_stem_gloss() != nil)
-      sb+=("\t" + "stem : " +get_stem_gloss().gsub(";","/") + "\n")
+      sb+=("\tstem : #{get_stem_gloss().gsub(";","/")}\n")
     end
     glosses = get_suffixes_glosses()
     if (glosses and glosses[0] != nil)
-          sb+=("\t" + "suffix : " + glosses[0].gsub(";","/") + "\n")
+          sb+=("\tsuffix : #{glosses[0].gsub(";","/")}\n")
     end
     return sb
   end
@@ -603,32 +514,26 @@ class Solution
     end
     temp_POS = []
     arr.each do |pos|
-      array = pos.split("/");
+      array = pos.split("/")
       j=1
       if(type==1)
         sb = ""
       elsif(type==2)
-        sb = array[0] + "\t"
+        sb = "#{array[0]}\t"
       else
-        sb = LatinArabicTranslator.translate(array[0]) + "\t"
+        sb = "#{LatinArabicTranslator.translate(array[0])}\t"
         sb.force_encoding "UTF-8"
       end
-      while( j < array.length)
-        if (j > 1)
-          sb+=" / "
-        end
-        sb+=array[j]
-        j+=1
-      end
+      sb <<  array[1..array.length].join(" / ")
       temp_POS.push(sb)
     end
     if(pre_stem_suff==2)
       if ((temp_POS.length > 1) and @debug)
-        puts "More than one stem for " + temp_POS.to_s
+        puts "More than one stem for #{temp_POS.to_s}"
       end
       if (type ==1 and temp_POS[0].empty?)
-        puts "Empty POS for stem " + get_stem_long_POS()
+        puts "Empty POS for stem #{get_stem_long_POS()}"
       end
       #return the first anyway :-(
       return temp_POS[0]
@@ -649,7 +554,7 @@ class Solution
     end
     if (temp_POS != nil)
               if (temp_POS[0]!=nil)
-                sb+=("\t" + "prefix : " + temp_POS[0] + "\n")
+                sb << ("\tprefix : #{temp_POS[0]}\n")
               end
     end
     if(arabic)
@@ -658,7 +563,7 @@ class Solution
       s = get_stem_long_POS()
     end
     if ( s != nil)
-      sb+=("\t" + "stem : " + s + "\n")
+      sb << ("\tstem : #{s}  \n")
     end
     if(arabic)
       temp_POS =get_suffixes_arabic_long_POS()
@@ -667,9 +572,21 @@ class Solution
     end
     if (temp_POS != nil)
               if (temp_POS[0]!=nil)
-                sb+=("\t" + "suffix : " + temp_POS[0] + "\n")
+                sb << ("\tsuffix : #{temp_POS[0]}\n")
               end
     end
     return sb
   end
 end
+class String
+  def ends_with_suffix_set?(ends_with_suffix_set)
+      length = self.length
+      length.times { |i|
+	      return true if ends_with_suffix_set.member?(self[i..length])
+	  }
+    return false
+  end
+ end

data/lib/raramorph/translator.rb CHANGED Viewed

@@ -24,7 +24,7 @@ class Translator
   def translate(string)
          result = ""
          i = 0
-         ## IF non Utf8 Char REturn
+         ## IF non Utf8 Char return
          return string unless string.length % 2  ==0
          while i < string.length-1
             char = string[i..i+1]

data/lib/raramorph.rb CHANGED Viewed

@@ -1,8 +1,10 @@
 #Dir[File.join(File.dirname(__FILE__), 'raramorph/**/*.rb')].sort.each { |lib| require lib }
 $:.unshift File.expand_path(File.dirname(__FILE__) )
 start = Time.now
 require 'set'
-require 'stringio'
+require 'stringio'
+require 'raramorph/logger'
 require 'raramorph/translator'
 require 'raramorph/arabic_latin_translator'
 require 'raramorph/latin_arabic_translator'

data/lib/raramorph_main.rb CHANGED Viewed

@@ -3,26 +3,31 @@
 # ARGV[2] # Verbose Default False
 # ARGV[4] # BuckWalter  Default False ( Arabic Output)
  $:.unshift File.expand_path(File.dirname(__FILE__) )
- if ARGV.length > 2 and ARGV.length <= 4
+ if ARGV.length >= 2 and ARGV.length <= 4
  require 'raramorph'
  start = Time.now
- Raramorph.execute(ARGV[0] , ARGV[1] , ARGV[2] , ARGV[3] )
+ verbose = false
+ not_arabic = true
+ verbose = true  if ARGV[2] and ARGV[2] == "-v"
+ not_arabic = false   if ARGV[3] and ARGV[3] == "-a"
+ not_arabic = false if ARGV[2] and ARGV[2] == "-a"
+ Raramorph.execute(ARGV[0] , ARGV[1] , verbose , not_arabic )
     puts "Time Elapsed= " + ( Time.now - start).to_s
  else
     puts("Arabic Morphological Analyzer for Ruby")
     puts("Ported to Ruby  by Moustafa Emara and Hany Salah El din , eSpace-technologies.(www.espace.com.eg) ,  2008.")
     puts("Based on :")
     puts("BUCKWALTER ARABIC MORPHOLOGICAL ANALYZER")
-    puts("This program is developed under the Ruby-Licences")
+    puts("This program is developed under the MIT-Licences")
     puts("Usage :")
     puts("")
-    puts("RaraMorph inFile [inEncoding] [outFile] [outEncoding] [-v]")
+    puts("raraMorph inFile [inEncoding] [outFile]  [-v] [-a]")
     puts("")
     puts("inFile : file to be analyzed")
     puts("inEncoding : encoding for inFile, default UTF-8")
-    puts("outFile : result file, default console")
-    puts("outEncoding : encoding for outFile, if not specified use Buckwalter transliteration with system's file.encoding")
+    puts("outFile : result file ")
     puts("-v : verbose mode")
+    puts("-a : Aarbic Output" )
  end

data/raramorph.gemspec CHANGED Viewed

@@ -1,8 +1,8 @@
 Gem::Specification.new do |s|
   s.name     = "raramorph"
-  s.version  = "0.1.0"
+  s.version  = "0.1.1"
   s.date     = "2008-09-06"
-  s.summary  = "Raramorph is a ruby gem for making morphological analysis and arabic indexing built using Ruby at eSpace-technologies ( www.espace.com.eg )"
+  s.summary  = "Raramorph is a ruby gem for making morphological analysis and arabic indexing built using Ruby at eSpace-technologies ( www.espace.com.eg ) "
   s.email    = "moustafa.emara@espace.com.eg"
   s.homepage = "http://github.com/espace/raramorph"
   s.description = "Raramorph is a ruby gem for making morphological analysis and arabic indexing built using Ruby at eSpace-technologies ( www.espace.com.eg )"
@@ -20,6 +20,7 @@ Gem::Specification.new do |s|
 		"lib/raramorph/arabic_latin_translator.rb",
 		"lib/raramorph/latin_arabic_translator.rb",
 		"lib/raramorph/in_memory_dictionary_handler.rb",
+		"lib/raramorph/logger.rb",
 		"lib/dictionaries/dictPrefixes",
 		"lib/dictionaries/dictStems",
 		"lib/dictionaries/dictSuffixes",
@@ -34,7 +35,8 @@ Gem::Specification.new do |s|
   s.executables = %w(raramorph)
   s.required_ruby_version = '>= 1.9'
   s.bindir = "bin"
-  #s.rdoc_options = ["--main", "README"]
-  #s.extra_rdoc_files = ["README"]
+  s.rdoc_options = ["--main", "README"]
+  s.extra_rdoc_files = ["README"]
   #s.extensions << "ext/extconf.rb"
 end

metadata CHANGED Viewed

@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
 specification_version: 1
 name: raramorph
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 date: 2008-09-06 00:00:00 +02:00
 summary: Raramorph is a ruby gem for making morphological analysis and arabic indexing built using Ruby at eSpace-technologies ( www.espace.com.eg )
 require_paths:
@@ -39,6 +39,7 @@ files:
 - lib/raramorph/translator.rb
 - lib/raramorph/arabic_latin_translator.rb
 - lib/raramorph/latin_arabic_translator.rb
+- lib/raramorph/logger.rb
 - lib/dictionaries/dictPrefixes
 - lib/dictionaries/dictStems
 - lib/dictionaries/dictSuffixes
@@ -49,12 +50,14 @@ files:
 - lib/raramorph.rb
 - lib/raramorph_main.rb
 - lib/test_input/UTF-8.txt
+- README
 test_files: []
-rdoc_options: []
-extra_rdoc_files: []
+rdoc_options:
+- --main
+- README
+extra_rdoc_files:
+- README
 executables:
 - raramorph
 extensions: []