RubyGems - nlp - Versions diffs - 0.2.5 → 0.2.6 - Mend

nlp 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

data/lib/liwc_analyzer.rb CHANGED Viewed

@@ -1,97 +1,74 @@
 module NLP
-class LIWCAnalyzer < Analyzer
-    def initialize( category_file, restore = true )
-        state_file = File.expand_path(Analyzer::CACHE_DIR+'.liwc')
-        if restore
-           @dictionary = Dictionary.restore(state_file)
-        else
-            @dictionary = Dictionary.new
-            @dictionary.load_categories( category_file, :rid => false )
-            @dictionary.store(state_file)
-        end
+  class LIWCAnalyzer < Analyzer
+    def initialize(dicts)
+      @dictionary = Dictionary.new(:liwc)
     end
-        def analyze(scanner)
-	    results = {
-                :word_count => 0,
-                :word_total => 0,
-                :scores => Hash.new { 0 },
-                :words => [],
-                :cwords => Hash.new { nil },
-                :long_words => [],
-                :zaimki => [],
-		:zaimki1 => [],
-		:zaimki2 => [],
-		:zaimki3 => [],
-                :przyimki => [],
-                :numbers => [],
-                :emotion => [],
-                :social => [],
-                :personal => [],
-                :posemotion => [],
-                :negemotion => [],
-                :wulgar => [],
-                :cognitive => []
-              }
-             while token = scanner.current
-                word = token.lemat
-                categories = @dictionary.find( word.gsub( /[^\w-]/, "" ) )
-                unless categories.nil?
-                    categories.each do |category|
-                       puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
-                       token.category = category
-                        results[:scores][category] = results[:scores][category] + 1
-                        if results[:cwords][category.name].nil?
-                            results[:cwords][category.name] = []
-                        end
-                        results[:cwords][category.name].push token.orth
-                        results[:emotion].push token.orth if token.emotion?
-                        results[:social].push token.orth if token.social?
-                        results[:personal].push token.orth if token.personal?
-                        results[:wulgar].push token.orth if token.bad_word?
-                        results[:cognitive].push token.orth if token.cognitive?
-                        results[:posemotion].push token.orth if token.positive_emotion?
-                        results[:negemotion].push token.orth if token.negative_emotion?
-                        results[:word_count] += 1
-                        results[:words].push word
-                    end
-                end
-                #words longer than 9
-                results[:long_words].push word if word.jlength > 9
-		if token.zaimek?
-                	results[:zaimki].push word
-			results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
-			results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
-			results[:zaimki3].push token.orth if word === 'on'
-		end
-                results[:przyimki].push word if token.przyimek?
-                results[:numbers].push token.orth if token.number? or token.liczebnik?
-                results[:word_total] += 1
-                scanner.next(:alphanum)
-             end
-             results
-	end
-end
+    def analyze(scanner)
+      results = Statistic.new
+      results.hash = {
+        :long_words => [],
+        :zaimki => [],
+        :zaimki1 => [],
+        :zaimki2 => [],
+        :zaimki3 => [],
+        :przyimki => [],
+        :numbers => [],
+        :emotion => [],
+        :social => [],
+        :personal => [],
+        :posemotion => [],
+        :negemotion => [],
+        :wulgar => [],
+        :cognitive => []
+      }
+      while token = scanner.current
+        word = token.lemat
+        categories = @dictionary.find(word.gsub( /[^\w-]/, "" ))
+        unless categories.nil?
+          categories.each do |category|
+            puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
+            token.category = category
+            results.add(word,category)
+            results[:emotion].push token.orth if token.emotion?
+            results[:social].push token.orth if token.social?
+            results[:personal].push token.orth if token.personal?
+            results[:wulgar].push token.orth if token.bad_word?
+            results[:cognitive].push token.orth if token.cognitive?
+            results[:posemotion].push token.orth if token.positive_emotion?
+            results[:negemotion].push token.orth if token.negative_emotion?
+          end
+        end
+        #words longer than 10
+        results[:long_words].push word if word.jlength > 10
+        if token.zaimek?
+          results[:zaimki].push word
+          results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
+          results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
+          results[:zaimki3].push token.orth if word === 'on'
+        end
+        results[:przyimki].push word if token.przyimek?
+        results[:numbers].push token.orth if token.number? or token.liczebnik?
+        results.total_words += 1
+        scanner.next(:alphanum)
+      end
+      results
+    end
+  end
 end

data/lib/liwc_category.rb CHANGED Viewed

@@ -1,62 +1,61 @@
 module NLP
-    class LIWCCategory < Category
+  class LIWCCategory < Category
-	#primary categories
-       def linguistic?
-          root == :PIERWOTNE
-        end
-        def psychological?
-          root == :PROCESY_PSYCHOLOGICZNE
-        end
+    #primary categories
-        def relative?
-            root === :RELATYWNOSC
-        end
-        def personal?
-          root == :OSOBISTE
-        end
+    def linguistic?
+      root == :PIERWOTNE
+    end
+    def psychological?
+      root == :PROCESY_PSYCHOLOGICZNE
+    end
+    def relative?
+      root === :RELATYWNOSC
+    end
+    def personal?
+      root == :OSOBISTE
+    end
-        #second categories
-        def emotion?
-            path.include? 'EMOCJE'
+    #second categories
-        end
+    def emotion?
+      path.include? 'EMOCJE'
-        def positive_emotion?
-             path.include? 'POZYTYWNE_EMOCJE'
-        end
+    end
-        def negative_emotion?
-            path.include? 'NEGATYWNE_EMOCJE'
+    def positive_emotion?
+      path.include? 'POZYTYWNE_EMOCJE'
-        end
+    end
-        def cognitive?
-            path.include? 'KOGNITYWNE_PROCESY'
+    def negative_emotion?
+      path.include? 'NEGATYWNE_EMOCJE'
-        end
+    end
-        def sense?
-            path.include? 'ZMYSLY'
-        end
+    def cognitive?
+      path.include? 'KOGNITYWNE_PROCESY'
-        def social?
-            path.include? 'SOCIAL'
+    end
-        end
+    def sense?
+      path.include? 'ZMYSLY'
+    end
-        def bad_word?
-            path.include? 'WULGAR'
-        end
+    def social?
+      path.include? 'SOCIAL'
+    end
+    def bad_word?
+      path.include? 'WULGAR'
     end
+  end
 end

data/lib/meaningable.rb CHANGED Viewed

@@ -1,76 +1,69 @@
 module Meaningable
+  #LIWC
+  #primary categories
-#LIWC
-    #primary categories
-       def linguistic?
-          category.root == :PIERWOTNE
-        end
-        def psychological?
-          category.root == :PROCESY_PSYCHOLOGICZNE
-        end
+  def linguistic?
+    category.root == :PIERWOTNE
+  end
-        def relative?
-            category.root === :RELATYWNOSC
-        end
-        def personal?
-          category.root == :OSOBISTE
-        end
+  def psychological?
+    category.root == :PROCESY_PSYCHOLOGICZNE
+  end
-        #second categories
-        def emotion?
-            category.path.include? 'EMOCJE'
-        end
+  def relative?
+    category.root === :RELATYWNOSC
+  end
-        def positive_emotion?
-             category.path.include? 'POZYTYWNE_EMOCJE'
-        end
+  def personal?
+    category.root == :OSOBISTE
+  end
-        def negative_emotion?
-            category.path.include? 'NEGATYWNE_EMOCJE'
+  #second categories
-        end
+  def emotion?
+    category.path.include? 'EMOCJE'
-        def cognitive?
-            category.path.include? 'KOGNITYWNE_PROCESY'
+  end
-        end
+  def positive_emotion?
+    category.path.include? 'POZYTYWNE_EMOCJE'
-        def sense?
-            category.path.include? 'ZMYSLY'
-        end
+  end
-        def social?
-            category.path.include? 'SOCIAL'
+  def negative_emotion?
+    category.path.include? 'NEGATYWNE_EMOCJE'
-        end
+  end
-        def bad_word?
-            category.path.include? 'WULGAR'
-        end
+  def cognitive?
+    category.path.include? 'KOGNITYWNE_PROCESY'
+  end
+  def sense?
+    category.path.include? 'ZMYSLY'
+  end
+  def social?
+    category.path.include? 'SOCIAL'
+  end
-#SEMANTIC
-	def synonym?(other)
-	end
+  def bad_word?
+    category.path.include? 'WULGAR'
+  end
-	def synonyms
-	end
+  #SEMANTIC
+  def synonym?(other)
+  end
+  def synonyms
+  end
 end

data/lib/nlp.rb CHANGED Viewed

@@ -1,4 +1,14 @@
+module  NLP
+   TAKIPI_XML_FILE = "/tmp/output.xml"
+   DICTIONARY_CACHE_DIR = "~/"
+end
 require 'stdlib/ext/array'
 require 'stdlib/ext/string.rb'
 require 'analyzer'

data/lib/rid_analyzer.rb CHANGED Viewed

@@ -1,74 +1,10 @@
 module NLP
-    class  RIDAnalyzer < NLP::Analyzer
-	def initialize( category_file, restore = true )
-        state_file = File.expand_path(Analyzer::CACHE_DIR+'.rid')
-        if restore
-           @dictionary = Dictionary.restore(state_file)
-        else
-            @dictionary = Dictionary.new
-            @dictionary.load_categories( category_file, :rid => true )
-            @dictionary.store(state_file)
-        end
-    end
-        def analyze(scanner)
-             results = {
-                :word_count => 0,
-                :word_total => 0,
-                :scores => Hash.new { 0 },
-                :words => [],
-                :cwords => Hash.new { nil }
-              }
-             while token = scanner.current
-                word = token.lemat
-                categories = @dictionary.find( word.gsub( /[^\w-]/, "" ) )
-                unless categories.nil?
-                    categories.each do |category|
-                       puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
-                        results[:scores][category] = results[:scores][category] + 1
-                        category = category.name
-                        if results[:cwords][category].nil?
-                            results[:cwords][category] = []
-                        end
-                        results[:cwords][category].push word
-                        results[:word_count] += 1
-                        results[:words].push word
-                    end
-                end
-                results[:word_total] += 1
-                scanner.next(:word)
-             end
-              results[:sorted_scores] = results[:scores].to_a.sort_by { |result| -result[1] }
-                p primary_sum = results[:sorted_scores].select { |result| result[0].primary? }.inject( 0 ) { |count,result| count + result[1] }
-                p secondary_sum = results[:sorted_scores].select { |result| result[0].secondary? }.inject( 0 ) { |count,result| count + result[1] }
-                p emotion_sum = results[:sorted_scores].select { |result| result[0].emotions? }.inject( 0 ) { |count,result| count + result[1] }
-              results[:classes] = {
-                :primary => Float(primary_sum) / results[:word_count],
-                :secondary => Float(secondary_sum) / results[:word_count],
-                :emotions => Float(emotion_sum) / results[:word_count]
-              }
-              results
-        end
+  class  RIDAnalyzer < Analyzer
+    def initialize
+      @dictionary = Dictionary.new(:rid)
     end
+  end
 end