RubyGems - treat - Versions diffs - 0.1.1 - Mend

treat 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

data/INSTALL +0 -0
data/LICENSE +28 -0
data/README +0 -0
data/TODO +67 -0
data/bin/INFO +1 -0
data/examples/benchmark.rb +81 -0
data/examples/keywords.rb +60 -0
data/examples/texts/bugged_out.txt +26 -0
data/examples/texts/half_cocked_basel.txt +16 -0
data/examples/texts/hedge_funds.txt +24 -0
data/examples/texts/hose_and_dry.txt +19 -0
data/examples/texts/hungarys_troubles.txt +46 -0
data/examples/texts/indias_slowdown.txt +15 -0
data/examples/texts/merkozy_rides_again.txt +24 -0
data/examples/texts/prada_is_not_walmart.txt +9 -0
data/examples/texts/republican_nomination.txt +26 -0
data/examples/texts/to_infinity_and_beyond.txt +15 -0
data/lib/treat.rb +91 -0
data/lib/treat/buildable.rb +115 -0
data/lib/treat/categories.rb +29 -0
data/lib/treat/category.rb +28 -0
data/lib/treat/delegatable.rb +90 -0
data/lib/treat/detectors.rb +28 -0
data/lib/treat/detectors/encoding/native.rb +12 -0
data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
data/lib/treat/detectors/format/file.rb +36 -0
data/lib/treat/detectors/language/language_detector.rb +19 -0
data/lib/treat/detectors/language/what_language.rb +29 -0
data/lib/treat/entities.rb +52 -0
data/lib/treat/entities/collection.rb +19 -0
data/lib/treat/entities/constituents.rb +15 -0
data/lib/treat/entities/document.rb +11 -0
data/lib/treat/entities/entity.rb +242 -0
data/lib/treat/entities/sentence.rb +8 -0
data/lib/treat/entities/text.rb +7 -0
data/lib/treat/entities/tokens.rb +37 -0
data/lib/treat/entities/zones.rb +17 -0
data/lib/treat/exception.rb +5 -0
data/lib/treat/extractors.rb +41 -0
data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
data/lib/treat/extractors/named_entity/abner.rb +20 -0
data/lib/treat/extractors/named_entity/stanford.rb +174 -0
data/lib/treat/extractors/statistics/frequency.rb +22 -0
data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
data/lib/treat/extractors/statistics/position_in.rb +13 -0
data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
data/lib/treat/extractors/time/chronic.rb +12 -0
data/lib/treat/extractors/time/native.rb +12 -0
data/lib/treat/extractors/time/nickel.rb +45 -0
data/lib/treat/extractors/topic_words/lda.rb +71 -0
data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
data/lib/treat/extractors/topics/reuters.rb +91 -0
data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
data/lib/treat/feature.rb +53 -0
data/lib/treat/formatters.rb +44 -0
data/lib/treat/formatters/cleaners/html.rb +17 -0
data/lib/treat/formatters/readers/autoselect.rb +35 -0
data/lib/treat/formatters/readers/gocr.rb +24 -0
data/lib/treat/formatters/readers/html.rb +13 -0
data/lib/treat/formatters/readers/ocropus.rb +31 -0
data/lib/treat/formatters/readers/pdf.rb +17 -0
data/lib/treat/formatters/readers/txt.rb +15 -0
data/lib/treat/formatters/serializers/xml.rb +48 -0
data/lib/treat/formatters/serializers/yaml.rb +15 -0
data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
data/lib/treat/formatters/unserializers/xml.rb +79 -0
data/lib/treat/formatters/unserializers/yaml.rb +15 -0
data/lib/treat/formatters/visualizers/dot.rb +73 -0
data/lib/treat/formatters/visualizers/html.rb +12 -0
data/lib/treat/formatters/visualizers/inspect.rb +16 -0
data/lib/treat/formatters/visualizers/short_value.rb +14 -0
data/lib/treat/formatters/visualizers/standoff.rb +41 -0
data/lib/treat/formatters/visualizers/tree.rb +28 -0
data/lib/treat/formatters/visualizers/txt.rb +31 -0
data/lib/treat/group.rb +96 -0
data/lib/treat/inflectors.rb +50 -0
data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
data/lib/treat/inflectors/declensors/en.rb +18 -0
data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
data/lib/treat/inflectors/stemmers/porter.rb +158 -0
data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
data/lib/treat/inflectors/stemmers/uea.rb +30 -0
data/lib/treat/lexicalizers.rb +49 -0
data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
data/lib/treat/lexicalizers/tag/brill.rb +101 -0
data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
data/lib/treat/processors.rb +45 -0
data/lib/treat/processors/chunkers/txt.rb +27 -0
data/lib/treat/processors/parsers/enju.rb +214 -0
data/lib/treat/processors/parsers/stanford.rb +60 -0
data/lib/treat/processors/segmenters/punkt.rb +48 -0
data/lib/treat/processors/segmenters/stanford.rb +45 -0
data/lib/treat/processors/segmenters/tactful.rb +34 -0
data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
data/lib/treat/processors/tokenizers/perl.rb +96 -0
data/lib/treat/processors/tokenizers/punkt.rb +42 -0
data/lib/treat/processors/tokenizers/stanford.rb +33 -0
data/lib/treat/processors/tokenizers/tactful.rb +59 -0
data/lib/treat/proxies.rb +66 -0
data/lib/treat/registrable.rb +26 -0
data/lib/treat/resources.rb +10 -0
data/lib/treat/resources/categories.rb +18 -0
data/lib/treat/resources/delegates.rb +96 -0
data/lib/treat/resources/dependencies.rb +0 -0
data/lib/treat/resources/edges.rb +8 -0
data/lib/treat/resources/formats.rb +23 -0
data/lib/treat/resources/languages.rb +86 -0
data/lib/treat/resources/languages.txt +504 -0
data/lib/treat/resources/tags.rb +393 -0
data/lib/treat/sugar.rb +43 -0
data/lib/treat/tree.rb +174 -0
data/lib/treat/utilities.rb +127 -0
data/lib/treat/visitable.rb +27 -0
data/test/profile.rb +2 -0
data/test/tc_detectors.rb +27 -0
data/test/tc_entity.rb +105 -0
data/test/tc_extractors.rb +48 -0
data/test/tc_formatters.rb +46 -0
data/test/tc_inflectors.rb +39 -0
data/test/tc_lexicalizers.rb +39 -0
data/test/tc_processors.rb +36 -0
data/test/tc_resources.rb +27 -0
data/test/tc_treat.rb +64 -0
data/test/tc_tree.rb +60 -0
data/test/tests.rb +19 -0
data/test/texts.rb +20 -0
data/test/texts/english/long.html +24 -0
data/test/texts/english/long.txt +22 -0
data/test/texts/english/medium.txt +5 -0
data/test/texts/english/short.txt +3 -0
metadata +412 -0

data/lib/treat/inflectors/stemmers/porter_c.rb ADDED

@@ -0,0 +1,23 @@
+module Treat
+  module Inflectors
+    module Stemmers
+      # Stems words using the 'ruby-stemmer' gem, which
+      # wraps a C version of the Porter stemming algorithm.
+      #
+      # Project website: https://github.com/aurelian/ruby-stemmer
+      # Original paper: Porter, 1980. An algorithm for suffix stripping,
+      # Program, Vol. 14, no. 3, pp 130-137,
+      # Original C implementation: http://www.tartarus.org/~martin/PorterStemmer.
+      class PorterC
+        silently { require 'lingua/stemmer' }
+        ::LinguaStemmer = ::Lingua
+        Object.instance_eval { remove_const :Lingua }
+        # Stem the word using the Porter C algorithm.
+        # Options: none.
+        def self.stem(word, options = {})
+          silently { ::LinguaStemmer.stemmer(word.to_s) }
+        end
+      end
+    end
+  end
+end

data/lib/treat/inflectors/stemmers/uea.rb ADDED

@@ -0,0 +1,30 @@
+module Treat
+  module Inflectors
+    module Stemmers
+      # Stems a word using the UEA algorithm, implemented
+      # by the 'uea-stemmer' gem.
+      #
+      # "Similar to other stemmers, UEA-Lite operates on a
+      # set of rules which are used as steps. There are two
+      # groups of rules: the first to clean the tokens, and
+      # the second to alter suffixes."
+      #
+      #   Project website: https://github.com/ealdent/uea-stemmer
+      #   Original paper: Jenkins, Marie-Claire, Smith, Dan,
+      #   Conservative stemming for search and indexing, 2005.
+      #   http://www.uea.ac.uk/polopoly_fs/1.85493!stemmer25feb.pdf
+      class UEA
+        # Require the 'uea-stemmer' gem.
+        silently { require 'uea-stemmer' }
+        # Keep only one copy of the stemmer.
+        @@stemmer = nil
+        # Stems a word using the UEA algorithm, implemented
+        # by the 'uea-stemmer' gem.
+        def self.stem(entity, options = {})
+          @@stemmer ||= silently { ::UEAStemmer.new }
+          @@stemmer.stem(entity.to_s).strip
+        end
+      end
+    end
+  end
+end

data/lib/treat/lexicalizers.rb ADDED

@@ -0,0 +1,49 @@
+module Treat
+  # Lexicalizers allow the retrieval of lexical information
+  # (part of speech tag, synsets, hypersets, hyposets, etc.)
+  # of an entity.
+  module Lexicalizers
+    # Taggers return the part of speech tag of a word.
+    module Tag
+      extend Group
+      self.type = :annotator
+      self.targets = [:phrase, :word]
+    end
+    module Category
+      extend Group
+      self.type = :annotator
+      self.targets = [:phrase, :word]
+      def self.cat(entity, category); category; end # Remove
+    end
+    # Linkers allow to retrieve grammatical links
+    # between words.
+    module Linkages
+      extend Group
+      self.type = :annotator
+      self.targets = [:sentence, :word]
+    end
+    # Lexicons are dictionnaries of semantically linked
+    # word forms.
+    module Synsets
+      extend Group
+      self.type = :annotator
+      self.targets = [:word, :number]
+      def self.synonyms(entity, synsets)
+        synsets.collect { |ss| ss.synonyms }.flatten - [entity.value]
+      end
+      def self.antonyms(entity, synsets)
+        synsets.collect { |ss| ss.antonyms }.flatten
+      end
+      def self.hyponyms(entity, synsets)
+        synsets.collect { |ss| ss.hyponyms }.flatten
+      end
+      def self.hypernyms(entity, synsets)
+        synsets.collect { |ss| ss.hypernyms }.flatten
+      end
+    end
+    extend Treat::Category
+  end
+end

data/lib/treat/lexicalizers/category/from_tag.rb ADDED

@@ -0,0 +1,30 @@
+module Treat
+  module Lexicalizers
+    module Category
+      # A class that detects the category of a word from its tag,
+      # using the default tagger for the language of the entity.
+      class FromTag
+        # Find the category of the current entity.
+        # Options:
+        # :tagger => (Symbol) force the use of a tagger.
+        # :tag_to_cat => (Hash) a list of categories for each possible tag.
+        def self.category(entity, options = {})
+          if options.empty?
+            options = {
+              tagger: nil,
+              tag_to_cat: Treat::Resources::Tags::PTBWordTagToCategory
+            }
+          end
+          tag = options[:tagger].nil? ? entity.tag : entity.tag(options[:tagger])
+          cat = options[:tag_to_cat][tag]
+          if cat.nil?
+            warn "Category not found for tag #{tag}."
+            :unknown
+          else
+            cat
+          end
+        end
+      end
+    end
+  end
+end

data/lib/treat/lexicalizers/linkages/naive.rb ADDED

@@ -0,0 +1,63 @@
+module Treat
+  module Lexicalizers
+    module Linkages
+      class Naive
+        def self.linkages(entity, options = {})
+          linkage = options.delete(:linkage)
+          if linkage.nil?
+            raise Treat::Exception,
+            "You must supply the :linkage option."
+          end
+          if !respond_to?(linkage)
+            raise Treat::Exception,
+            "No handler to resolve linkage #{linkage}."
+          end
+          self.send(linkage, entity, options)
+        end
+        # %%%
+        def self.patient(entity, options)
+          # Not so simple here...                 Fix
+          if main_verb.has_feature?(:aux)
+            subject
+          elsif main_verb.voice == 'passive'
+            subject
+          elsif main_verb.voice == 'active'
+            # Each prepos.
+          end
+        end
+        # Return the subject of the sentence|verb.
+        def self.subject(entity, options)
+          verb = entity.category == :verb ?
+          main_verb(entity) : entity.main_verb
+          args = []
+          main_verb.edges.each_pair do |id,edge|
+            args << find(id)
+          end
+          args[0]
+        end
+        # Return the object of the sentence|verb.
+        def self.object(entity, options)
+          verb = entity.category == :verb ?
+          main_verb(entity) : entity.main_verb
+          if verb.voice == 'passive'
+            return
+          end
+          args = []
+          verb.edges.each_pair do |id,edge|
+            args << find(id)
+          end
+          args[1]
+        end
+        # Find the main verb (shallowest verb in the tree).
+        def self.main_verb(entity, options)
+          verbs = entity.words_with_cat(:verb)
+          if verbs.empty?
+            return
+          end
+          verbs.sort! { |a,b| a.depth <=> b.depth }
+          verbs[0]
+        end
+      end
+    end
+  end
+end

data/lib/treat/lexicalizers/synsets/rita_wn.rb ADDED

@@ -0,0 +1,23 @@
+module Treat
+  module Lexicalizers
+    module Synsets
+      # Currently not implemented.
+      class RitaWn
+        # Require the Ruby-Java bridge.
+        #silently do
+          require 'rjb'
+          # Load the RitaWN jars.
+          Rjb::load("#{Treat.bin}/jwnl/jwnl.jar", [])
+          JWNLException = Rjb::import('net.didion.jwnl.JWNLException')
+          Rjb::load("#{Treat.bin}/ritaWN/library/ritaWN.jar", [])
+          Rjb::add_jar("#{Treat.bin}/ritaWN/library/supportWN.jar")
+          Rjb::add_jar("#{Treat.bin}/ritaWNcore1.0.jar")
+          RiWordnet = ::Rjb::import('rita.wordnet.RiWordnet')
+        #end
+        def self.synsets(word, options = nil)
+        end
+      end
+    end
+  end
+end

data/lib/treat/lexicalizers/synsets/wordnet.rb ADDED

@@ -0,0 +1,72 @@
+module Treat
+  module Lexicalizers
+    module Synsets
+      # Obtain lexical information about a word using the
+      # ruby 'wordnet' gem.
+      class Wordnet
+        # Require the 'wordnet' gem.
+        require 'wordnet'
+        # Obtain lexical information about a word using the
+        # ruby 'wordnet' gem.
+        def self.synsets(word, options = nil)
+          unless [:noun, :adjective, :verb].include?(word.category)
+            return []
+          end
+          cat = word.category.to_s.capitalize
+          index = ::WordNet.const_get(cat + 'Index').instance
+          lemma = index.find(word.value.downcase)
+          return [] if lemma.nil?
+          synsets = []
+          lemma.synsets.each { |synset| synsets << Synset.new(synset) }
+          synsets
+        end
+      end
+    end
+    # An adaptor for synsets used by the Wordnet gem.
+    class Synset
+      # The POS tag of the word.
+      attr_accessor :pos
+      # The definition of the synset.
+      attr_accessor :definition
+      # The examples in the synset.
+      attr_accessor :examples
+      def initialize(synset)
+        @original_synset = synset
+        @pos, @definition, @examples =
+        parse_synset(synset.to_s.split(')'))
+      end
+      def parse_synset(res)
+        pos = res[0][1..-1].strip
+        res2 = res[1].split('(')
+        res3 = res2[1].split(';')
+        1.upto(res3.size-1) do |i|
+          res3[i] = res3[i].strip[1..-2]
+        end
+        definition = res3[0]
+        examples = res3[1..-1]
+        return pos, definition, examples
+      end
+      # The words in the synset.
+      def words; @original_synset.words; end
+      def synonyms; @original_synset.words; end
+      # A gloss (short definition with examples)
+      # for the synset.
+      def gloss; @original_synset.gloss; end
+      # The antonym sets of the synset.
+      def antonyms; antonym.collect { |a| a.words }; end
+      # The hypernym sets of the synset.
+      def hypernyms; hypernym.words; end
+      # The hyponym sets of the synset.
+      def hyponyms; hyponym.collect { |h| h.words }; end
+      # Respond to the missing method event.
+      def method_missing(sym, *args, &block)
+        ret = @original_synset.send(sym)
+        if ret.is_a?(::WordNet::Synset)
+          Synset.new(ret)
+        else
+          ret
+        end
+      end
+    end
+  end
+end

data/lib/treat/lexicalizers/tag/brill.rb ADDED

@@ -0,0 +1,101 @@
+module Treat
+  module Lexicalizers
+    module Tag
+      # Adapter class for the 'rbtagger' gem, a port
+      # of the Perl Lingua::BrillTagger class, based
+      # on the rule-based tagger developped by Eric Brill.
+      #
+      # The Brill tagger is a simple rule-based part of
+      # speech tagger. The main advantages over stochastic
+      # taggers is a vast reduction in information required
+      # and better portability from one tag set, corpus genre
+      # or language to another.
+      #
+      # Original paper:
+      # Eric Brill. 1992. A simple rule-based part of speech tagger.
+      # In Proceedings of the third conference on Applied natural
+      # language processing (ANLC '92). Association for Computational
+      # Linguistics, Stroudsburg, PA, USA, 152-155.
+      # DOI=10.3115/974499.974526 http://dx.doi.org/10.3115/974499.974526
+      # Project website:
+      # http://rbtagger.rubyforge.org/
+      # Original Perl module site:
+      # http://search.cpan.org/~kwilliams/Lingua-BrillTagger-0.02/lib/Lingua/BrillTagger.pm
+      class Brill
+        patch = false
+        # Require the 'rbtagger' gem.
+        begin
+          silently { require 'rbtagger' }
+        # This whole mess is required to deal with
+        # the fact that the 'rbtagger' gem defines
+        # a top-level module called 'Word', which
+        # will clash with the top-level class 'Word'
+        # we define when syntactic sugar is enabled.
+        rescue TypeError
+          if Treat.edulcorated?
+            patch = true
+            # Unset the class Word for the duration
+            # of loading the tagger.
+            Object.const_unset(:Word); retry
+          else
+            raise Treat::Exception,
+            'Something went wrong due to a name clash with the "rbtagger" gem.' +
+            'Turn off syntactic sugar to resolve this problem.'
+          end
+        ensure
+          # Reset the class Word if using syntactic sugar.
+          if Treat.edulcorated? && patch
+            Object.const_set(:Word, Treat::Entities::Word)
+          end
+        end
+        # Hold the tagger.
+        @@tagger = nil
+        # Hold the user-set options
+        @@options = {}
+        # Hold the default options.
+        DefaultOptions =  {
+          lexicon: nil,
+          lexical_rules: nil,
+          contextual_rules: nil
+        }
+        # Tag words using a native Brill tagger.
+        #
+        # Available options:
+        # :lexicon => String (Lexicon file to use)
+        # :lexical_rules => String (Lexical rule file to use)
+        # :contextual_rules => String (Contextual rules file to use)
+        def self.tag(entity, options = {})
+          # Reinitialize the tagger if the options have changed.
+          if options != @@options
+            @@options = DefaultOptions.merge(options)
+            @@tagger = nil # Reset the tagger
+          end
+          # Create the tagger if necessary
+          @@tagger ||= ::Brill::Tagger.new(options[:lexicon],
+          options[:lexical_rules], options[:contextual_rules])
+          # Perform tagging.
+          if entity.type == :word
+            # Setup the context of the word
+            l = entity.left
+            r = entity.right
+            l = l.nil? ? '' : l.to_s
+            r = r.nil? ? '' : r.to_s
+            c = "#{l} #{entity.value} #{r}"
+          end
+          res = @@tagger.tag(c)
+          if l == ''
+            unless r == ''
+              entity.next_sibling.set(:tag, res[3][1])
+            end
+            return res[2][1]
+          else
+            unless r == ''
+              entity.next_sibling.set(:tag, res[2][1])
+            end
+            return res[1][1]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/treat/lexicalizers/tag/lingua.rb ADDED

@@ -0,0 +1,114 @@
+module Treat
+  module Lexicalizers
+    module Tag
+      # An adapter for the 'engtagger' gem, which
+      # is a port of the Perl Lingua::EN::Tagger module.
+      #
+      # "This module uses part-of-speech statistics from
+      # the Penn Treebank to assign POS tags to English text.
+      # The tagger applies a bigram (two-word) Hidden Markov
+      # Model to guess the appropriate POS tag for a word.
+      # That means that the tagger will try to assign a POS
+      # tag based on the known POS tags for a given word and
+      # the POS tag assigned to its predecessor.
+      #
+      # Project website: http://engtagger.rubyforge.org/
+      # Original Perl module site:
+      # http://cpansearch.perl.org/src/ACOBURN/Lingua-EN-Tagger-0.15/
+      class Lingua
+        # Require the 'engtagger' gem.
+        silently { require 'engtagger' }
+        # Hold the tagger.
+        @@tagger = nil
+        # Hold the user-set options
+        @@options = {}
+        # Hold the default options.
+        DefaultOptions =  {
+          unknown_word_tag: '?',
+          relax: false,
+          debug: false
+        }
+        # Tag the word using a probabilistic model taking
+        # into account known words found in a lexicon and
+        # the tag of the previous word.
+        #
+        # Options:
+        #
+        #   :relax => (Boolean) Relax the Hidden Markov Model:
+        #   this may improve accuracy for uncommon words,
+        #   particularly words used polysemously.
+        #   :debug => (Boolean) Print debug messages.
+        #   :unknown_word_tag => (String) Tag for unknown words.
+        def self.tag(entity, options = {})
+          # Reinitialize the tagger if the options have changed.
+          if options != @@options
+            @@options = DefaultOptions.merge(options)
+            @@tagger = nil # Reset the tagger
+          end
+          @@tagger ||= ::EngTagger.new(@@options)
+          left = entity.left
+          if left.nil? || left.type != :word
+            left_tag = 'pp'
+          else
+            left_tag = left.tag.downcase
+            left_tag = 'pp' if left_tag == ''
+          end
+          w = @@tagger.clean_word(entity.to_s)
+          t = @@tagger.conf[:current_tag] =
+          @@tagger.assign_tag(left_tag, w)
+          t.upcase
+        end
+      end
+    end
+  end
+end
+=begin
+CC      Conjunction, coordinating               and, or
+CD      Adjective, cardinal number              3, fifteen
+DET     Determiner                              this, each, some
+EX      Pronoun, existential there              there
+FW      Foreign words
+IN      Preposition / Conjunction               for, of, although, that
+JJ      Adjective                               happy, bad
+JJR     Adjective, comparative                  happier, worse
+JJS     Adjective, superlative                  happiest, worst
+LS      Symbol, list item                       A, A.
+MD      Verb, modal                             can, could, 'll
+NN      Noun                                    aircraft, data
+NNP     Noun, proper                            London, Michael
+NNPS    Noun, proper, plural                    Australians, Methodists
+NNS     Noun, plural                            women, books
+PDT     Determiner, prequalifier                quite, all, half
+POS     Possessive                              's, '
+PRP     Determiner, possessive second           mine, yours
+PRPS    Determiner, possessive                  their, your
+RB      Adverb                                  often, not, very, here
+RBR     Adverb, comparative                     faster
+RBS     Adverb, superlative                     fastest
+RP      Adverb, particle                        up, off, out
+SYM     Symbol                                  *
+TO      Preposition                             to
+UH      Interjection                            oh, yes, mmm
+VB      Verb, infinitive                        take, live
+VBD     Verb, past tense                        took, lived
+VBG     Verb, gerund                            taking, living
+VBN     Verb, past/passive participle           taken, lived
+VBP     Verb, base present form                 take, live
+VBZ     Verb, present 3SG -s form               takes, lives
+WDT     Determiner, question                    which, whatever
+WP      Pronoun, question                       who, whoever
+WPS     Determiner, possessive & question       whose
+WRB     Adverb, question                        when, how, however
+PP      Punctuation, sentence ender             ., !, ?
+PPC     Punctuation, comma                      ,
+PPD     Punctuation, dollar sign                $
+PPL     Punctuation, quotation mark left        ``
+PPR     Punctuation, quotation mark right       ''
+PPS     Punctuation, colon, semicolon, elipsis  :, ..., -
+LRB     Punctuation, left bracket               (, {, [
+RRB     Punctuation, right bracket              ), }, ]
+=end