RubyGems - engtagger - Versions diffs - 0.3.2 → 0.4.0 - Mend

engtagger 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 6aa6da6cfb58bffd900843f62675d5895e80428be7295ae056ed73327286233d
-  data.tar.gz: dd412266b905ba4d378521540247a368bc4f73dfa89e8d6e58c220625c46e40d
+  metadata.gz: 0b61370e322595bd880097f51fe0728780fa6a01ee9975e6eb333c8720ff36d8
+  data.tar.gz: 0f990be4f4d5f71908d76f0fb52f2c925a2a01891a815cbc70eaf7a39f77edfe
 SHA512:
-  metadata.gz: de1aa006ea943270e4dcea78690e8a10551c42819abbf3c27b6d2629d600745124ec5cfa6a6104d3cb4c87dbfc14d09e643e7b2143979dee27485841fd76b0fe
-  data.tar.gz: 3404a699868beb475daee809cc67788a70152c0d5eba045b7d3c007e3b3fccb66ee6bb432832a8e9872cd6d3faf281fab60bf151c01eaf1cf52d6275644012bb
+  metadata.gz: ade5d1cf6fc11553519fe9217dffb06453e0ab7d69ab1532b3f2e2079dd05d035d90ce5ce92e4d0e1195f2a8f79df5b4d44c4cedb27f14df529ac0b0e91cf730
+  data.tar.gz: ff085546b0db152df0983dabea49ec5b0cf47525cca6118d3776378e908ea04fd675f0bb1daceb944d6be141615e3a5d9da5774025a0dc6ef609dd8b311b1412

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,75 @@
+AllCops:
+  NewCops: disable
+  SuggestExtensions: false
+  TargetRubyVersion: 2.6
+Documentation:
+  Enabled: false
+Naming/AccessorMethodName:
+  Enabled: false
+Naming/VariableNumber:
+  Enabled: false
+Naming/FileName:
+  Enabled: false
+Security/MarshalLoad:
+  Enabled: false
+Layout/EndOfLine:
+   Enabled: False
+Style/ClassVars:
+  Enabled: false
+Style/OptionalBooleanParameter:
+  Enabled: false
+Style/StringConcatenation:
+  Enabled: false
+Style/PerlBackrefs:
+  Enabled: false
+Style/StringLiterals:
+  Enabled: true
+  EnforcedStyle: double_quotes
+Style/StringLiteralsInInterpolation:
+  Enabled: true
+  EnforcedStyle: double_quotes
+Style/WordArray:
+  Enabled: false
+Style/EvalWithLocation:
+  Enabled: false
+Layout/LineLength:
+  Max: 400
+Metrics/MethodLength:
+  Max: 80
+Metrics/BlockLength:
+  Max: 60
+Metrics/AbcSize:
+  Max: 60
+Metrics/PerceivedComplexity:
+  Max: 60
+Metrics/ClassLength:
+  Max: 800
+Metrics/CyclomaticComplexity:
+  Max: 60
+Metrics/ParameterLists:
+  Max: 8
+Metrics/ModuleLength:
+  Max: 200

data/.solargraph.yml ADDED Viewed

@@ -0,0 +1,22 @@
+---
+include:
+- "**/*.rb"
+exclude:
+- spec/**/*
+- test/**/*
+- vendor/**/*
+- ".bundle/**/*"
+require: []
+domains: []
+reporters:
+- rubocop
+# - require_not_found
+formatter:
+  rubocop:
+    cops: safe
+    except: []
+    only: []
+    extra_args: []
+require_paths: []
+plugins: []
+max_files: 5000

data/Gemfile CHANGED Viewed

@@ -1,3 +1,7 @@
-source 'https://rubygems.org'
+# frozen_string_literal: true
-gem 'lru_redux'
+source "https://rubygems.org"
+gemspec
+gem "lru_redux"

data/README.md CHANGED Viewed

@@ -19,56 +19,58 @@ of regular expressions.
 * Extract noun phrases from tagged text
 * etc.
-### Synopsis:
+### Synopsis
-    require 'engtagger'
+```ruby
+require 'engtagger'
-    # Create a parser object
-    tgr = EngTagger.new
+# Create a parser object
+tgr = EngTagger.new
-    # Sample text
-    text = "Alice chased the big fat cat."
+# Sample text
+text = "Alice chased the big fat cat."
-    # Add part-of-speech tags to text
-    tagged = tgr.add_tags(text)
+# Add part-of-speech tags to text
+tagged = tgr.add_tags(text)
-    #=> "<nnp>Alice</nnp> <vbd>chased</vbd> <det>the</det> <jj>big</jj> <jj>fat</jj><nn>cat</nn> <pp>.</pp>"
+#=> "<nnp>Alice</nnp> <vbd>chased</vbd> <det>the</det> <jj>big</jj> <jj>fat</jj><nn>cat</nn> <pp>.</pp>"
-    # Get a list of all nouns and noun phrases with occurrence counts
-    word_list = tgr.get_words(text)
+# Get a list of all nouns and noun phrases with occurrence counts
+word_list = tgr.get_words(text)
-    #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
+#=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
-    # Get a readable version of the tagged text
-    readable = tgr.get_readable(text)
+# Get a readable version of the tagged text
+readable = tgr.get_readable(text)
-    #=> "Alice/NNP chased/VBD the/DET big/JJ fat/JJ cat/NN ./PP"
+#=> "Alice/NNP chased/VBD the/DET big/JJ fat/JJ cat/NN ./PP"
-    # Get all nouns from a tagged output
-    nouns = tgr.get_nouns(tagged)
+# Get all nouns from a tagged output
+nouns = tgr.get_nouns(tagged)
-    #=> {"cat"=>1, "Alice"=>1}
+#=> {"cat"=>1, "Alice"=>1}
-    # Get all proper nouns
-    proper = tgr.get_proper_nouns(tagged)
+# Get all proper nouns
+proper = tgr.get_proper_nouns(tagged)
-    #=> {"Alice"=>1}
+#=> {"Alice"=>1}
-    # Get all past tense verbs
-    pt_verbs = tgr.get_past_tense_verbs(tagged)
+# Get all past tense verbs
+pt_verbs = tgr.get_past_tense_verbs(tagged)
-    #=> {"chased"=>1}
+#=> {"chased"=>1}
-    # Get all the adjectives
-    adj = tgr.get_adjectives(tagged)
+# Get all the adjectives
+adj = tgr.get_adjectives(tagged)
-    #=> {"big"=>1, "fat"=>1}
+#=> {"big"=>1, "fat"=>1}
-    # Get all noun phrases of any syntactic level
-    # (same as word_list but take a tagged input)
-    nps = tgr.get_noun_phrases(tagged)
+# Get all noun phrases of any syntactic level
+# (same as word_list but take a tagged input)
+nps = tgr.get_noun_phrases(tagged)
-    #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
+#=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1}
+```
 ### Tag Set

data/Rakefile CHANGED Viewed

@@ -1,2 +1,10 @@
-#!/usr/bin/env rake
+# frozen_string_literal: true
 require "bundler/gem_tasks"
+require "rake/testtask"
+Rake::TestTask.new do |t|
+  t.libs << "test"
+  t.test_files = FileList["test/test*.rb"]
+  t.verbose = true
+end

data/engtagger.gemspec CHANGED Viewed

@@ -1,19 +1,22 @@
-# -*- encoding: utf-8 -*-
-require File.expand_path('../lib/engtagger/version', __FILE__)
+# frozen_string_literal: true
+require_relative "lib/engtagger/version"
 Gem::Specification.new do |gem|
   gem.authors       = ["Yoichiro Hasebe"]
   gem.email         = ["yohasebe@gmail.com"]
-  gem.summary         = %q{A probability based, corpus-trained English POS tagger}
-  gem.description     = %q{A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained tagger that assigns POS tags to English text based on a lookup dictionary and a set of probability values.}
-  gem.homepage        = "http://github.com/yohasebe/engtagger"
-  gem.files         = `git ls-files`.split($\)
-  gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
+  gem.summary       = "A probability based, corpus-trained English POS tagger"
+  gem.description   = "A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained tagger that assigns POS tags to English text based on a lookup dictionary and a set of probability values."
+  gem.homepage      = "http://github.com/yohasebe/engtagger"
+  gem.license       = "GPL"
+  gem.required_ruby_version = Gem::Requirement.new(">= 2.6")
+  gem.files = Dir.chdir(File.expand_path(__dir__)) do
+    `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
+  end
+  gem.executables   = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
   gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
   gem.name          = "engtagger"
   gem.require_paths = ["lib"]
   gem.version       = EngTagger::VERSION
-  gem.add_runtime_dependency 'lru_redux'
+  gem.add_dependency "lru_redux"
 end

data/lib/engtagger/porter.rb CHANGED Viewed

@@ -1,23 +1,20 @@
-#!/usr/bin/env ruby
-# -*- coding: utf-8 -*-
+# frozen_string_literal: true
 module Stemmable
   STEP_2_LIST = {
-    'ational'=>'ate', 'tional'=>'tion', 'enci'=>'ence', 'anci'=>'ance',
-    'izer'=>'ize', 'bli'=>'ble',
-    'alli'=>'al', 'entli'=>'ent', 'eli'=>'e', 'ousli'=>'ous',
-    'ization'=>'ize', 'ation'=>'ate',
-    'ator'=>'ate', 'alism'=>'al', 'iveness'=>'ive', 'fulness'=>'ful',
-    'ousness'=>'ous', 'aliti'=>'al',
-    'iviti'=>'ive', 'biliti'=>'ble', 'logi'=>'log'
-  }
+    "ational" => "ate", "tional" => "tion", "enci" => "ence", "anci" => "ance",
+    "izer" => "ize", "bli" => "ble",
+    "alli" => "al", "entli" => "ent", "eli" => "e", "ousli" => "ous",
+    "ization" => "ize", "ation" => "ate",
+    "ator" => "ate", "alism" => "al", "iveness" => "ive", "fulness" => "ful",
+    "ousness" => "ous", "aliti" => "al",
+    "iviti" => "ive", "biliti" => "ble", "logi" => "log"
+  }.freeze
   STEP_3_LIST = {
-    'icate'=>'ic', 'ative'=>'', 'alize'=>'al', 'iciti'=>'ic',
-    'ical'=>'ic', 'ful'=>'', 'ness'=>''
-  }
+    "icate" => "ic", "ative" => "", "alize" => "al", "iciti" => "ic",
+    "ical" => "ic", "ful" => "", "ness" => ""
+  }.freeze
   SUFFIX_1_REGEXP = /(
                     ational  |
@@ -40,7 +37,7 @@ module Stemmable
                     aliti    |
                     iviti    |
                     biliti   |
-                    logi)$/x
+                    logi)$/x.freeze
   SUFFIX_2_REGEXP = /(
@@ -61,20 +58,18 @@ module Stemmable
                       iti      |
                       ous      |
                       ive      |
-                      ize)$/x
+                      ize)$/x.freeze
-  C = "[^aeiou]"         # consonant
-  V = "[aeiouy]"         # vowel
-  CC = "#{C}(?>[^aeiouy]*)"  # consonant sequence
-  VV = "#{V}(?>[aeiou]*)"    # vowel sequence
+  C = "[^aeiou]" # consonant
+  V = "[aeiouy]" # vowel
+  CC = "#{C}(?>[^aeiouy]*)" # consonant sequence
+  VV = "#{V}(?>[aeiou]*)"   # vowel sequence
-  MGR0 = /^(#{CC})?#{VV}#{CC}/o                # [cc]vvcc... is m>0
-  MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o       # [cc]vvcc[vv] is m=1
-  MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o      # [cc]vvccvvcc... is m>1
-  VOWEL_IN_STEM   = /^(#{CC})?#{V}/o                      # vowel in stem
+  MGR0 = /^(#{CC})?#{VV}#{CC}/o.freeze # [cc]vvcc... is m>0
+  MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o.freeze # [cc]vvcc[vv] is m=1
+  MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o.freeze # [cc]vvccvvcc... is m>1
+  VOWEL_IN_STEM = /^(#{CC})?#{V}/o.freeze # vowel in stem
-  #
   # Porter stemmer in Ruby.
   #
   # This is the Porter stemming algorithm, ported to Ruby from the
@@ -90,30 +85,31 @@ module Stemmable
   #
   def stem_porter
     # make a copy of the given object and convert it to a string.
-    w = self.dup.to_str
+    w = dup.to_str
     return w if w.length < 3
     # now map initial y to Y so that the patterns never treat it as vowel
-    w[0] = 'Y' if w[0] == ?y
+    w[0] = "Y" if w[0] == "y"
     # Step 1a
-    if w =~ /(ss|i)es$/
+    case w
+    when /(ss|i)es$/
       w = $` + $1
-    elsif w =~ /([^s])s$/
+    when /([^s])s$/
       w = $` + $1
     end
     # Step 1b
-    if w =~ /eed$/
+    case w
+    when /eed$/
       w.chop! if $` =~ MGR0
-    elsif w =~ /(ed|ing)$/
+    when /(ed|ing)$/
       stem = $`
       if stem =~ VOWEL_IN_STEM
         w = stem
-	case w
+        case w
         when /(at|bl|iz)$/             then w << "e"
         when /([^aeiouylsz])\1$/       then w.chop!
         when /^#{CC}#{V}[^aeiouwxy]$/o then w << "e"
@@ -131,59 +127,41 @@ module Stemmable
       stem = $`
       suffix = $1
       # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
-      if stem =~ MGR0
-        w = stem + STEP_2_LIST[suffix]
-      end
+      w = stem + STEP_2_LIST[suffix] if stem =~ MGR0
     end
     # Step 3
     if w =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
       stem = $`
       suffix = $1
-      if stem =~ MGR0
-        w = stem + STEP_3_LIST[suffix]
-      end
+      w = stem + STEP_3_LIST[suffix] if stem =~ MGR0
     end
     # Step 4
     if w =~ SUFFIX_2_REGEXP
       stem = $`
-      if stem =~ MGR1
-        w = stem
-      end
+      w = stem if stem =~ MGR1
     elsif w =~ /(s|t)(ion)$/
       stem = $` + $1
-      if stem =~ MGR1
-        w = stem
-      end
+      w = stem if stem =~ MGR1
     end
     #  Step 5
     if w =~ /e$/
       stem = $`
-      if (stem =~ MGR1) ||
-          (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
-        w = stem
-      end
+      w = stem if (stem =~ MGR1) || (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
     end
-    if w =~ /ll$/ && w =~ MGR1
-      w.chop!
-    end
+    w.chop! if w =~ /ll$/ && w =~ MGR1
     # and turn initial Y back to y
-    w[0] = 'y' if w[0] == ?Y
+    w[0] = "y" if w[0] == "Y"
     w
   end
-  #
   # make the stem_porter the default stem method, just in case we
   # feel like having multiple stemmers available later.
-  #
   alias stem stem_porter
 end
 # Add stem method to all Strings

data/lib/engtagger/version.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 class EngTagger
-  VERSION = "0.3.2"
+  VERSION = "0.4.0"
 end