RubyGems - nlp - Versions diffs - 0.2.5 → 0.2.6 - Mend

nlp 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

data/lib/rid_category.rb CHANGED Viewed

@@ -1,18 +1,17 @@
 module NLP
-    class RIDCategory < Category
+  class RIDCategory < Category
     def primary?
       root == :PIERWOTNE
     end
     def secondary?
       root == :WTORNE
     end
     def emotions?
       root == :EMOCJE
     end
-    end
+  end
 end

data/lib/sentence.rb CHANGED Viewed

@@ -1,16 +1,24 @@
 module NLP
-    class Sentence
-        attr_reader :tokens
-        def initialize()
-            @tokens = []
-        end
+  class Sentence
-        def << tokens
-            @tokens.concat tokens
-        end
+    attr_reader :tokens
-        def words_number
-            @tokens.size
-        end
+    def initialize()
+      @tokens = []
     end
+    def << tokens
+      if tokens.is_a? Array
+        @tokens.concat tokens
+      else
+        @tokens << tokens
+      end
+      self
+    end
+    def words_number
+      @tokens.count{|t| !t.interp?}
+    end
+  end
 end

data/lib/statistic.rb ADDED Viewed

@@ -0,0 +1,55 @@
+class Statistic
+  attr_accessor :total_words, :hash
+  attr_reader :cwords, :words, :total_words, :word_count
+  def initialize
+    @word_count = 0
+    @total_words = 0
+    @scores = Hash.new { 0 }
+    @words = []
+    @cwords = Hash.new {nil}
+    @hash
+  end
+  def add(word,category)
+    @scores[category] += 1
+    @word_count += 1
+    @words.push word
+    category = category.name
+    if @cwords[category].nil?
+      @cwords[category] = []
+    end
+    @cwords[category].push word
+  end
+  def []=(key,value)
+    @hash[key] = value
+  end
+  def [](key)
+    @hash[key]
+  end
+  def category_participation(categories)
+    sorted_scores = @scores.to_a.sort_by { |result| -result[1] }
+    r = {}
+    categories.each do |cat|
+      r[cat] = percentage_distribution(sorted_scores){|c| c.send(cat.to_s+'?')}
+    end
+    r
+  end
+  private
+  def percentage_distribution scores, &block
+    sum = scores.select{|result| yield result[0]}.inject(0){|count,result| count + result[1]}
+    Float(sum)/@word_count
+  end
+end

data/lib/stdlib/ext/array.rb CHANGED Viewed

@@ -2,5 +2,12 @@ class Array
   def tail
     self[1..-1]
   end
+  def mean
+    sum=0
+    self.each{|v| sum+=v }
+    sum/self.size
+  end
 end

data/lib/stree.rb CHANGED Viewed

@@ -5,53 +5,53 @@ module NLP
     SYMBOLS = %w{* - : - / ) (}
     attr_accessor :value
     attr_accessor :subtrees
     # 0 -> *
     # 1 -> -
     # 2 -> a
     # 33 -> ź
     def initialize
-      @subtrees = Array.new( 34, nil )
+      @subtrees = Array.new(34, nil)
       @value = []
     end
-    def insert( s, value )
-      priv_insert( s.scan(/./), value )
+    def insert(s, value)
+      priv_insert(s.scan(/./), value)
     end
-    def find( s )
-      priv_find( s.scan(/./) )
+    def find(s)
+      priv_find(s.scan(/./))
     end
-  protected
+    protected
     def key( chr )
-        unless chr
-            raise ArgumentError,  "Argument chr is nil"
-        end
-        rval = ALPHABET.index(chr) || -1
-        if rval > 35
-          rval = -1 # invalid character
-        end
-       rval
+      unless chr
+        raise ArgumentError,  "Argument chr is nil"
+      end
+      rval = ALPHABET.index(chr) || -1
+      if rval > 35
+        rval = -1 # invalid character
+      end
+      rval
     end
     def priv_insert( s, value )
       if s.empty?
         @value.push value
       else
         index = key( s.first )
         subtree = if @subtrees[index] == nil
-          @subtrees[index] = SearchTree.new
-        else
-          @subtrees[index]
-        end
+                    @subtrees[index] = SearchTree.new
+                  else
+                    @subtrees[index]
+                  end
         subtree.priv_insert( s.tail, value )
       end
     end
     def priv_find( search )
       if @subtrees[0]
         @subtrees[0].value
@@ -69,17 +69,17 @@ module NLP
       end
     end
-public
-   def traverse()
-        list = []
-        yield @value
-        list.concat @subrees if @subtrees  != nil
-        loop do
-            break if list.empty?
-            node = list.shift
-            yield node.value
-            list.concat node.subtrees if node.subtrees != nil
-        end
-end
-end
+    public
+    def traverse()
+      list = []
+      yield @value
+      list.concat @subrees if @subtrees  != nil
+      loop do
+        break if list.empty?
+        node = list.shift
+        yield node.value
+        list.concat node.subtrees if node.subtrees != nil
+      end
+    end
+  end
 end

data/lib/takipi_web_service.rb CHANGED Viewed

@@ -2,50 +2,50 @@ require 'rubygems'
 require 'savon'
 class TakipiWebService
-    URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
-    WSDL_URL = URL + 'takipi.wsdl'
-    def self.request(text)
-        client  = Savon::Client.new WSDL_URL, :soap_endpoint => URL
-       # Call remote service methods
-        response =  client.tag do |soap|
-            soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
-        end
-        response =  response.to_hash
-        token =  response[:tag_response][:tag_response][:msg]
-        status = (response[:tag_response][:tag_response][:status]).to_i
-         #checking status
-         timeout = 60
-         step = 5
-         count = 0
-         loop do
-            break if count > timeout
-            if status == 1
-                    break
-            elsif status == 2 or status == 3
-                            count += 5
-                            sleep(1)
-                            r = client.get_status do |soap|
-                                soap.body = "<token>#{token}</token>"
-                            end.to_hash
-                            status =  (r[:get_status_response][:status]).to_i
-            end
-         end
-         #geting result
-        result = client.get_result do |soap|
-            soap.body="<token>#{token}</token>"
-        end
-        response_document = result.to_hash[:get_result_response][:tag_response][:msg]
-        #transforming response to well formed xml string
-       return "<xml><chunkList>#{response_document}</chunkList></xml>"
-    end
+  URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
+  WSDL_URL = URL + 'takipi.wsdl'
+  def self.request(text)
+    client  = Savon::Client.new WSDL_URL, :soap_endpoint => URL
+    # Call remote service methods
+    response =  client.tag do |soap|
+      soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
+    end
+    response =  response.to_hash
+    token =  response[:tag_response][:tag_response][:msg]
+    status = (response[:tag_response][:tag_response][:status]).to_i
+    #checking status
+    timeout = 60
+    step = 5
+    count = 0
+    loop do
+      break if count > timeout
+      if status == 1
+        break
+      elsif status == 2 or status == 3
+        count += 5
+        sleep(1)
+        r = client.get_status do |soap|
+          soap.body = "<token>#{token}</token>"
+        end.to_hash
+        status =  (r[:get_status_response][:status]).to_i
+      end
+    end
+    #geting result
+    result = client.get_result do |soap|
+      soap.body="<token>#{token}</token>"
+    end
+    response_document = result.to_hash[:get_result_response][:tag_response][:msg]
+    #transforming response to well formed xml string
+    return "<xml><chunkList>#{response_document}</chunkList></xml>"
+  end
 end

data/lib/text.rb CHANGED Viewed

@@ -1,25 +1,26 @@
 module NLP
-    class Text
-        attr_reader :sentences
+  class Text
+    attr_reader :sentences
-        def initialize
-            @sentences = []
-        end
+    def initialize
+      @sentences = []
+    end
-        def << sentence
-            @sentences.push sentence
-        end
+    def << sentence
+      @sentences.push sentence
+    end
-        def words_per_sentence
-            mean(@sentences.collect{|s| s.words_number})
-        end
+    def words_per_sentence
+      @sentences.collect{|s| s.words_number}.mean
+    end
-        private
-        def mean(x)
-            sum=0
-            x.each{|v| sum+=v }
-            sum/x.size
-        end
+    def flatten
+      flattened = []
+      @sentences.each{ |s| s.tokens.each{|t| flattened.push t } }
+      flattened
     end
+  end
 end

data/lib/token.rb CHANGED Viewed

@@ -1,34 +1,37 @@
 module NLP
-    class Token
-       attr_reader :orth
-       attr_reader :tags
-        def initialize(orth,tags)
-            @orth = orth
-            @tags = tags
-        end
+  class Token
+    attr_reader :orth
+    attr_reader :tags
+    def initialize(orth,tags)
+      @orth = orth
+      @tags = tags
+    end
-        def interp?
-            @tags.eql? "interp"
-        end
+    def symbol?
+      @tags.eql? "tsym"
+    end
-        def word?
-            not interp? and not number?
-        end
+    def interp?
+      @tags.eql? "interp"
+    end
-        def number?
-            @tags.include?("tnum")
-        end
+    def word?
+      not interp? and not number?
+    end
-        def integer?
-            @tags.include?("tnum:integer")
-        end
+    def number?
+      @tags.include?("tnum")
+    end
-        def float?
-            @tags.include?("tnum:frac")
-        end
+    def integer?
+      @tags.include?("tnum:integer")
+    end
+    def float?
+      @tags.include?("tnum:frac")
     end
+  end
 end

data/lib/token_scanner.rb CHANGED Viewed

@@ -1,72 +1,60 @@
 module NLP
-    class TokenScanner
-        attr_reader :text, :tokens
+  class TokenScanner
-        def initialize(text)
-            @text = text
-            @pos = 0
-            @tokens = flatten_text(@text)
-        end
+    attr_reader :text, :tokens
-        def next(type)
-            @pos+=1
+    def initialize(text)
+      @text = text
+      @pos = 0
+      @tokens = @text.flatten
+    end
-            case type
-            when :word
-                while @pos < @tokens.size and !@tokens[@pos].word?
-                    @pos+= 1
-                end
+    def next(type)
+      @pos+=1
-            when :interp
-                while @pos < @tokens.size and !@tokens[@pos].interp?
-                    @pos+= 1
-                end
-             when :number
-                while @pos < @tokens.size and !@tokens[@pos].number?
-                    @pos+= 1
-                end
-             when :alphanum
-                while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
-                    @pos+= 1
-                end
-            end
+      case type
+      when :word
+        while @pos < @tokens.size and !@tokens[@pos].word?
+          @pos+= 1
         end
-        def current
-            if @pos == @tokens.size
-                nil
-            else
-                @tokens[@pos]
-            end
+      when :interp
+        while @pos < @tokens.size and !@tokens[@pos].interp?
+          @pos+= 1
         end
-        def rewind
-            @pos = 0
+      when :number
+        while @pos < @tokens.size and !@tokens[@pos].number?
+          @pos+= 1
         end
-        def index
-            @pos
+      when :alphanum
+        while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
+          @pos+= 1
         end
+      end
+    end
-        def end?
-            @pos == tokens.size
-        end
+    def current
+      if @pos == @tokens.size
+        nil
+      else
+        @tokens[@pos]
+      end
+    end
-        private
+    def rewind
+      @pos = 0
+    end
-        def flatten_text(text)
-            flattened = []
-            text.sentences.each { |s| s.tokens.each {|t| flattened.push t } }
-            flattened
-        end
+    def index
+      @pos
+    end
-end
+    def end?
+      @pos == tokens.size
+    end
+  end
 end