RubyGems - tagmemics - Versions diffs - 0.0.0.beta → 0.0.2 - Mend

tagmemics 0.0.0.beta → 0.0.2

Files changed (15) hide show

checksums.yaml +4 -4
data/config/adjectives.txt +1137 -0
data/config/adjectives.txt.bak +1136 -0
data/config/articles.txt +3 -0
data/config/conjunctions.txt +7 -0
data/config/linking_verbs.txt +28 -0
data/config/prepositions.txt +202 -0
data/config/pronouns.txt +53 -0
data/lib/tagmemics.rb +41 -24
data/lib/tagmemics/{config.rb → load_data.rb} +2 -2
data/lib/tagmemics/version.rb +3 -0
data/lib/tagmemics/word.rb +46 -56
data/lib/tagmemics/word/confidence.rb +65 -0
data/lib/tagmemics/word/wordnet.rb +38 -9
metadata +22 -7

data/lib/tagmemics/word/confidence.rb ADDED

@@ -0,0 +1,65 @@
+module Tagmemics
+  class Word
+    class << self
+      # FIXME:  need to scan left, not hard coded index.
+      # Add up each category and derive percentage.
+      def noun_confidence(arr, index)
+        str = arr[index]
+        wordnet_prob = (WordNetMethods.possibilities(str)['noun'] / 1) * 6
+        left_neighbor_article = article_confidence(arr[index - 2]) * 2
+        subtotal = wordnet_prob + left_neighbor_article
+        subtotal / 10.0
+      end
+      def verb_confidence(str)
+        wordnet_prob = WordNetMethods.wordnet_probability(str, 'verb') * 6
+        subtotal = wordnet_prob
+        subtotal / 10.0
+      end
+      # UPDATE PARAMETERS
+      def adjective_confidence(str)
+        wordnet_prob = WordNetMethods.wordnet_probability(str, 'adjective') * 6
+        # lneighbor_adjective = 0 * 2
+        # rneighbor_verb = 0 * 2
+        subtotal = wordnet_prob
+        subtotal / 10.0
+      end
+      def part_of_speech?(constant, str, positive = false)
+        arr = []
+        constant.each do |word|
+          positive = true if word.downcase == str.downcase
+          break if positive
+        end
+        positive
+      end
+      def adverb_confidence(str)
+      end
+      def linking_verb_confidence(str)
+        part_of_speech?(Tagmemics::WordSet::LINKING_VERBS, str) ? 1.0 : 0.0
+      end
+      def article_confidence(str)
+        part_of_speech?(Tagmemics::WordSet::ARTICLES, str) ? 1.0 : 0.0
+      end
+      def preposition_confidence(str)
+        part_of_speech?(Tagmemics::WordSet::PREPOSITIONS, str) ? 1.0 : 0.0
+      end
+      def pronoun_confidence(str)
+        part_of_speech?(Tagmemics::WordSet::PRONOUNS, str) ? 1.0 : 0.0
+      end
+      def conjunction_confidence(str)
+        part_of_speech?(Tagmemics::WordSet::CONJUNCTIONS, str) ? 1.0 : 0.0
+      end
+    end
+  end
+end

data/lib/tagmemics/word/wordnet.rb CHANGED

@@ -1,10 +1,8 @@
 require 'wordnet'
 require 'facets'
-puts "you're at the right place"
-module Lexicon
-  module WordNet
+module Tagmemics
+  module WordNetMethods
     class << self
       def lex
         WordNet::Lexicon.new
@@ -26,17 +24,48 @@ module Lexicon
         parts_of_speech_frequency(word).values.reduce(:+)
       end
-      def orig_probability(word)
+      # returns hash of all possibilities for given dictionary word.
+      def possibilities(word)
         hsh = parts_of_speech_frequency(word)
         denom = total_possibilities(word)
         hsh.each { |k, v| hsh[k] = v / denom.to_f }
       end
-      def most_likely(word)
-        hsh = probability(word)
-        max = hsh.values.max
-        hsh.select { |_k, v| v == max }
+      # Most likely part of speech
+      def most_likely_pos(probability_hsh)
+        return unless probability_hsh.is_a? Hash
+        max = probability_hsh.values.max
+        probability_hsh.select { |_k, v| v == max }
+      end
+      ####
+      #
+      # DELETE ME?
+      #
+      #
+      def most_likely_probability(hsh)
+        most_likely_pos(hsh).values.reduce(:+)
+      end
+      def combine_values(hsh)
+        hsh.values.reduce(:+)
+      end
+      # Select highest probable part of speech and combine with any
+      # others with similiar name ie. Adjective and Adjective Satellite,
+      # they will be added together.
+      def wordnet_probability(word, part_of_speech)
+        hsh = possibilities(word)
+        eligibles = hsh.select { |k, _v| k.split.include? part_of_speech }
+        combine_values(eligibles) || 0.0 # return if probability is nil
+      end
+      # TODO: Not using this.  Delete?
+      def decimal_complete(hsh)
+        total = hsh.length
+        complete = hsh.count { |_k, v| v } # not nil
+        complete / total.to_f
       end
     end
   end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tagmemics
 version: !ruby/object:Gem::Version
-  version: 0.0.0.beta
+  version: 0.0.2
 platform: ruby
 authors:
 - John Mason
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-10-26 00:00:00.000000000 Z
+date: 2015-10-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: facets
@@ -122,7 +122,13 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '2.7'
-description: One day this will be great.  Until then, it will be a project.
+description: "\n    The English language is extremely complicated.  We have words
+  that can have multiple\n  parts of speech.  Natural language processing is difficult
+  because it is hard to\n  tell if a word is a noun when it could be a verb or an
+  adjective, etc.\n\n  The purpose of this project is to develop an algorithm that,
+  given a sentence string,\n  has a ranking system that detects the part of speech
+  of each word.\n\n  Why is the useful?  Because understanding the correct parts of
+  speech in a sentence\n  is the first step to teaching a robot how to read.\n    "
 email: mace2345@gmail.com
 executables: []
 extensions: []
@@ -130,10 +136,19 @@ extra_rdoc_files: []
 files:
 - README.md
 - Rakefile
+- config/adjectives.txt
+- config/adjectives.txt.bak
+- config/articles.txt
+- config/conjunctions.txt
+- config/linking_verbs.txt
+- config/prepositions.txt
+- config/pronouns.txt
 - lib/tagmemics.rb
-- lib/tagmemics/config.rb
+- lib/tagmemics/load_data.rb
 - lib/tagmemics/sentence.rb
+- lib/tagmemics/version.rb
 - lib/tagmemics/word.rb
+- lib/tagmemics/word/confidence.rb
 - lib/tagmemics/word/wordnet.rb
 homepage: http://github.com/m8ss/tagmemics
 licenses:
@@ -150,13 +165,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">"
+  - - ">="
     - !ruby/object:Gem::Version
-      version: 1.3.1
+      version: '0'
 requirements: []
 rubyforge_project:
 rubygems_version: 2.4.5
 signing_key:
 specification_version: 4
-summary: A more organized way of accessing a language.
+summary: Detect parts of speech from a sentence.
 test_files: []