RubyGems - tagmemics - Versions diffs - 0.0.0.beta → 0.0.2 - Mend

tagmemics 0.0.0.beta → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/config/adjectives.txt +1137 -0
data/config/adjectives.txt.bak +1136 -0
data/config/articles.txt +3 -0
data/config/conjunctions.txt +7 -0
data/config/linking_verbs.txt +28 -0
data/config/prepositions.txt +202 -0
data/config/pronouns.txt +53 -0
data/lib/tagmemics.rb +41 -24
data/lib/tagmemics/{config.rb → load_data.rb} +2 -2
data/lib/tagmemics/version.rb +3 -0
data/lib/tagmemics/word.rb +46 -56
data/lib/tagmemics/word/confidence.rb +65 -0
data/lib/tagmemics/word/wordnet.rb +38 -9
metadata +22 -7

data/config/articles.txt ADDED

@@ -0,0 +1,3 @@
+a
+an
+the

data/config/conjunctions.txt ADDED

@@ -0,0 +1,7 @@
+for
+and
+nor
+but
+or
+yet
+so

data/config/linking_verbs.txt ADDED

@@ -0,0 +1,28 @@
+Be
+Am
+Is
+Are
+Was
+Were
+Has become
+Could have come
+Shall be
+Shall have been
+Have appeared
+Should have appeared
+Will be
+Will have been
+Had seemed
+Should have been
+Has been
+Have been
+Had been
+Can be
+May be
+Might be
+Should be
+Could be
+Become
+Would be
+Appear
+Seem

data/config/prepositions.txt ADDED

@@ -0,0 +1,202 @@
+'gainst
+'mongst
+'neath
+'twixt
+abaft
+abeam
+aboard
+about
+above
+absent
+according to
+across
+afore
+after
+against
+ago
+ahead of
+along
+alongside
+amid
+amidst
+among
+amongst
+anenst
+anent
+anti
+apart
+apart from
+apropos
+apud
+around
+as
+as far as
+as for
+as of
+as opposed to
+as per
+as regards
+as soon as
+as well as
+aside
+aside from
+astern of
+astride
+at
+at the behest of
+athwart
+atop
+away
+ayond
+ayont
+back to
+barring
+because of
+before
+behind
+behither
+below
+beneath
+beside
+besides
+between
+betwixen
+betwixt
+beyond
+biforn
+but
+by
+by means of
+by virtue of
+chez
+circa
+close to
+concerning
+considering
+contra
+cum
+despite
+down
+due to
+during
+ere
+except
+except for
+excluding
+failing
+far from
+following
+for
+for the sake of
+forby
+forenenst
+fornenst
+fornent
+from
+fromward
+froward
+frowards
+gainst
+given
+hence
+in
+in accordance with
+in addition to
+in case of
+in front of
+in lieu of
+in order to
+in place of
+in point of
+in re
+in spite of
+in to
+including
+inside
+inside of
+inside out
+instead of
+into
+left of
+like
+mid
+midst
+minus
+modulo
+near
+near to
+neath
+next
+next to
+notwithstanding
+o'
+of
+off
+on
+on account of
+on behalf of
+on to
+on top of
+onto
+opposite
+opposite of
+opposite to
+out
+out from
+out of
+outside
+outside of
+outwith
+over
+overthwart
+owing to
+pace
+past
+per
+plus
+prior to
+pro
+pursuant to
+qua
+rather than
+re
+regarding
+regardless of
+right of
+round
+sans
+save
+since
+subsequent to
+such as
+than
+thanks to
+that of
+through
+throughout
+till
+times
+to
+tofore
+toforn
+toward
+towards
+under
+underneath
+unlike
+until
+unto
+up
+up to
+upon
+versus
+via
+vice
+with
+with a view to
+with regard to
+with respect to
+withal
+within
+without
+worth

data/config/pronouns.txt ADDED

@@ -0,0 +1,53 @@
+I
+he
+her
+hers
+herself
+him
+himself
+his
+hisself
+it
+its
+itself
+me
+mine
+my
+myself
+one
+one's
+oneself
+our
+ours
+ourself
+ourselves
+she
+thee
+their
+theirs
+theirself
+theirselves
+them
+themself
+themselves
+they
+thine
+thou
+thy
+thyself
+us
+we
+who
+whom
+whomself
+whose
+whoself
+y'all
+ye
+you
+you all
+your
+yours
+yourself
+yourselves
+youse

data/lib/tagmemics.rb CHANGED

@@ -1,35 +1,52 @@
 require_relative './tagmemics/word'
 require_relative './tagmemics/sentence'
+require_relative './tagmemics/load_data'
-module Lexicon
+module Tagmemics
   def self.parse(str)
-    ParsedSentence.new(str)
+    WordSet.new(str)
   end
-end
-# The output of Lexicon.parse
-class ParsedSentence
-  attr_accessor :nouns, :verbs, :articles, :adjectives, :adverbs,
-                :prepositions, :conjunctions, :pronouns
+  # The output of Tagmemics.parse
+  class WordSet
+    ARTICLES = %w(the an a)
+    CONJUNCTIONS = %w(for and nor but or yet so )
+    LINKING_VERBS = LoadData.contents_to_a('linking_verbs')
+    PRONOUNS = LoadData.contents_to_a('pronouns')
+    PREPOSITIONS = LoadData.contents_to_a('prepositions')
-  def initialize(str)
-    @str = str
-  end
+    attr_accessor :nouns, :verbs, :articles, :adjectives, :adverbs,
+                  :prepositions, :conjunctions, :pronouns, :collection
-  def sentence_to_array(sentence)
-    sentence.split(/\W/)
-  end
+    def initialize(str)
+      @collection = []
+      arr = WordSet.sentence_to_array(str)
+      arr.each { |word| @collection << Word.new(word) }
+      # @set = WordSet.start_hash(WordSet.sentence_to_array(str))
+    end
+    class << self
+      include LoadData
+      # Will probably want to use punctuation in the future.
+      # For now, this removes it.
+      def sentence_to_array(sentence)
+        sentence.split(/\s+|\W+\z/)
+      end
-  def start_hash(arr)
-    arr.map do |word|
-      result =
-        case
-        when part_of_speech(ARTICLES, word).any? then :article
-        when part_of_speech(CONJUNCTIONS, word).any? then :conjunction
-        when part_of_speech(PRONOUNS, word).any? then :pronoun
-        end
-      [word, result]
-    end.to_h
+      # Moved part of speech.  This will not work right now.
+      # Probably need to delete this.
+      def start_hash(arr)
+        arr.map do |word|
+          result =
+            case
+            when part_of_speech(ARTICLES, word).any? then :article
+            when part_of_speech(CONJUNCTIONS, word).any? then :conjunction
+            when part_of_speech(PRONOUNS, word).any? then :pronoun
+            end
+          [word, result]
+        end.to_h
+      end
+    end
   end
 end

data/lib/tagmemics/{config.rb → load_data.rb} RENAMED

@@ -1,5 +1,5 @@
 # Retrieves data from config folder to save to constants.
-module Config
+module LoadData
   def self.config_path
     File.join(File.dirname(__FILE__), '../../config')
   end
@@ -24,7 +24,7 @@ module Config
     page = agent.get(uri)
     destination = "./config/#{part_of_speech}.txt"
     target = page.search(css_selector)
-    regx = /[^'a-zA-Z\s]/ # anything beside letters, apostrophe or space
+    regx = /[^\047a-zA-Z\s]/ # \047 is an apostrophe
     arr = []
     target.each do |x|

data/lib/tagmemics/version.rb ADDED

@@ -0,0 +1,3 @@
+module Tagmemics
+  VERSION = '0.0.2'
+end

data/lib/tagmemics/word.rb CHANGED

@@ -1,69 +1,59 @@
 require 'wordnet'
 require 'facets'
-require_relative './config'
+require_relative './load_data'
 require_relative './word/wordnet'
+require_relative './word/confidence'
-module Lexicon
+module Tagmemics
   class Word
-    include Config
-    ARTICLES = %w(the an a)
-    CONJUNCTIONS = %w(for and nor but or yet so )
-    PRONOUNS = Config.contents_to_a('pronouns')
-    def part_of_speech(constant, str)
-      arr = []
-      constant.each do |word|
-        regx = /\b#{word}\b/i
-        arr << word if regx =~ str # word phrase matches
-      end
-      arr
-    end
-    def decimal_complete(hsh)
-      total = hsh.length
-      complete = hsh.count { |_k, v| v } # not nil
-      complete / total.to_f
-    end
     def initialize(word)
-      @word = word
-      @confidence_levels = confidence_levels(word)
-    end
-    def confidence_levels(word)
-      {
-        :noun => noun_confidence(word),
-        :verb => verb_confidence(word),
-        :adjective => adjective_confidence(word),
-        :adverb => adverb_confidence(word),
-        :article => article_confidence(word),
-        :preposition => preposition_confidence(word),
-        :conjunction => conjunction_confidence(word)
-      }
-    end
-    def noun_confidence(str)
-      (WordNet.orig_probability(str) / 1) * 3
-    end
-    def verb_confidence(str)
-    end
-    def adjective_confidence(str)
-    end
+      @str = word
+      puts "examining: #{word}"
+      @tagmemic_confidence = Word.confidence_levels(word)
+    end
+    class << self
+      ##
+      # Because WordNet only tracks verbs, nouns, adverbs and adjectives,
+      # confidence levels can only be updated for those values.  The other words
+      #  such as pronouns, prepositions, and conjunctions are based off of list
+      #  in config folder.  Their score is pass or fail and is
+      #  calculated as 0 or 1.0.
+      def confidence_levels(word)
+        word = word.downcase
+        known_hsh = determine_known_words(word)
+        wordnet_hsh = determine_wordnet_words(word)
+        hsh = everything_nil(known_hsh) ? wordnet_hsh : known_hsh
+        delete_nogos(hsh)
+      end
-    def adverb_confidence(str)
-    end
+      def determine_known_words(word)
+        {
+          :article => article_confidence(word),
+          :preposition => preposition_confidence(word),
+          :pronoun => pronoun_confidence(word),
+          :conjunction => conjunction_confidence(word),
+          :linking_verb => linking_verb_confidence(word)
+        }
+      end
-    def article_confidence(str)
-    end
+      def everything_nil(hsh)
+        (hsh.select { |_k, v| v != 0.0 && !v.nil? }.empty?)
+      end
-    def preposition_confidence(str)
-    end
+      def delete_nogos(hsh)
+        hsh.delete_if { |_k, v| v == 0.0 || v.nil? }
+      end
-    def conjunction_confidence(str)
+      def determine_wordnet_words(word)
+        {
+          :noun => WordNetMethods.wordnet_probability(word, 'noun'),
+          :verb => WordNetMethods.wordnet_probability(word, 'verb'),
+          :adjective => WordNetMethods.wordnet_probability(word, 'adjective'),
+          :adverb => WordNetMethods.wordnet_probability(word, 'adverb')
+        }
+      end
     end
   end
 end