RubyGems - engtagger - Versions diffs - 0.1.2 → 0.2.0 - Mend

engtagger 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 47684458b8965c2c1f52d0d29ca2cad340b0ed3f
+  data.tar.gz: aa6dbd0473409e9b6b2987a42c126a6b10dce096
+SHA512:
+  metadata.gz: cddd67eab940146a2426032714aedd8e5195192ead3133ade5f76c594c5f0667f0747bba8b019f63df91dd2eb0610da19288a9e06a2323eb7bde91fec025b028
+  data.tar.gz: a76ca3422b9a3a1a813263b6e4ab5e69ca74ac998afcc259be55a6441b6b46b21ac9de6eb241327956a6fda1c2d3dbd033ba1678df86506f153089a3ef99d46d

data/README.md CHANGED

@@ -54,7 +54,16 @@ of regular expressions.
     proper = tgr.get_proper_nouns(tagged)
     #=> {"Alice"=>1}
+    # Get all past tense verbs
+    pt_verbs = tgr.get_past_tense_verbs(tagged)
+    #=> {"chased"=>1}
+    # Get all the adjectives
+    adj = tgr.get_adjectives(tagged)
+    #=> {"big"=>1, "fat"=>1}
     # Get all noun phrases of any syntactic level
     # (same as word_list but take a tagged input)
@@ -126,6 +135,11 @@ of this Ruby library
 * Yoichiro Hasebe (yohasebe [at] gmail.com)
+### Contributors
+* Carlos Ramirez III
+* Phil London
 ### Acknowledgement
 This Ruby library is a direct port of Lingua::EN::Tagger available at CPAN.

data/lib/engtagger.rb CHANGED

@@ -59,7 +59,6 @@ class EngTagger
   NUM   = get_ext('cd')
   GER   = get_ext('vbg')
   ADJ   = get_ext('jj[rs]*')
-  PART  = get_ext('vbn')
   NN    = get_ext('nn[sp]*')
   NNP   = get_ext('nnp')
   PREP  = get_ext('in')
@@ -68,6 +67,15 @@ class EngTagger
   QUOT  = get_ext('ppr')
   SEN   = get_ext('pp')
   WORD  = get_ext('\w+')
+  VB    = get_ext('vb')
+  VBG   = get_ext('vbg')
+  VBD   = get_ext('vbd')
+  PART  = get_ext('vbn')
+  VBP   = get_ext('vbp')
+  VBZ   = get_ext('vbz')
+  JJ    = get_ext('jj')
+  JJR   = get_ext('jjr')
+  JJS   = get_ext('jjs')
   # Convert a Treebank-style, abbreviated tag into verbose definitions
   def self.explain_tag(tag)
@@ -183,7 +191,7 @@ class EngTagger
     @conf[:debug] = false
     # assuming that we start analyzing from the beginninga new sentence...
     @conf[:current_tag] = 'pp'
-    @conf.merge(params) if params
+    @conf.merge!(params)
     unless File.exists?(@conf[:word_path]) and File.exists?(@conf[:tag_path])
       print "Couldn't locate POS lexicon, creating a new one" if @conf[:debug]
       @@hmm = Hash.new
@@ -321,7 +329,143 @@ class EngTagger
     end
     return ret
   end
+  def get_infinitive_verbs(tagged)
+    return nil unless valid_text(tagged)
+    VB
+    trimmed = tagged.scan(VB).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_past_tense_verbs(tagged)
+    return nil unless valid_text(tagged)
+    VBD
+    trimmed = tagged.scan(VBD).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_gerund_verbs(tagged)
+    return nil unless valid_text(tagged)
+    VBG
+    trimmed = tagged.scan(VB).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_passive_verbs(tagged)
+    return nil unless valid_text(tagged)
+    PART
+    trimmed = tagged.scan(PART).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_base_present_verbs(tagged)
+    return nil unless valid_text(tagged)
+    VBP
+    trimmed = tagged.scan(VBP).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_present_verbs(tagged)
+    return nil unless valid_text(tagged)
+    VBZ
+    trimmed = tagged.scan(VBZ).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_adjectives(tagged)
+    return nil unless valid_text(tagged)
+    JJ
+    trimmed = tagged.scan(JJ).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_comparative_adjectives(tagged)
+    return nil unless valid_text(tagged)
+    JJR
+    trimmed = tagged.scan(JJR).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
+  def get_superlative_adjectives(tagged)
+    return nil unless valid_text(tagged)
+    JJS
+    trimmed = tagged.scan(JJS).map do |n|
+      strip_tags(n)
+    end
+    ret = Hash.new(0)
+    trimmed.each do |n|
+      n = stem(n)
+      next unless n.length < 100  # sanity check on word length
+      ret[n] += 1 unless n =~ /\A\s*\z/
+    end
+    return ret
+  end
   # Given a POS-tagged text, this method returns only the maximal noun phrases.
   # May be called directly, but is also used by get_noun_phrases
   def get_max_noun_phrases(tagged)

data/lib/engtagger/pos_tags.hash CHANGED

Binary file

data/lib/engtagger/pos_words.hash CHANGED

Binary file

data/lib/engtagger/version.rb CHANGED

@@ -1,3 +1,3 @@
 module EngTagger
-  VERSION = "0.1.2"
+  VERSION = "0.2.0"
 end

data/test/test_engtagger.rb CHANGED

@@ -191,6 +191,11 @@ EOD
     text = ""
     assert(!@tagger.valid_text(text))
   end
+  def test_override_default_params
+    @tagger = EngTagger.new(:longest_noun_phrase => 3)
+    assert_equal 3, @tagger.conf[:longest_noun_phrase]
+  end
 end
 # Number of errors detected: 24

metadata CHANGED

@@ -1,15 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: engtagger
 version: !ruby/object:Gem::Version
-  version: 0.1.2
-  prerelease:
+  version: 0.2.0
 platform: ruby
 authors:
 - Yoichiro Hasebe
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-05 00:00:00.000000000 Z
+date: 2014-04-20 00:00:00.000000000 Z
 dependencies: []
 description: A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained
   tagger that assigns POS tags to English text based on a lookup dictionary and a
@@ -20,7 +19,7 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- .gitignore
+- ".gitignore"
 - Gemfile
 - LICENSE
 - README.md
@@ -37,27 +36,26 @@ files:
 - test/test_engtagger.rb
 homepage: http://github.com/yohasebe/engtagger
 licenses: []
+metadata: {}
 post_install_message:
 rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
-  none: false
   requirements:
-  - - ! '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
-  none: false
   requirements:
-  - - ! '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.24
+rubygems_version: 2.2.2
 signing_key:
-specification_version: 3
+specification_version: 4
 summary: A probability based, corpus-trained English POS tagger
 test_files:
 - test/test_engtagger.rb