RubyGems - did_you_mean - Versions diffs - 1.3.1 → 1.4.0 - Mend

did_you_mean 1.3.1 → 1.4.0

Files changed (52) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +48 -0
data/CHANGELOG.md +75 -75
data/Gemfile +2 -1
data/README.md +2 -32
data/Rakefile +4 -5
data/appveyor.yml +25 -0
data/did_you_mean.gemspec +6 -4
data/documentation/CHANGELOG.md.erb +8 -0
data/documentation/changelog_generator.rb +34 -0
data/documentation/human_typo_api.md +20 -0
data/documentation/tree_spell_algorithm.md +82 -0
data/documentation/tree_spell_checker_api.md +24 -0
data/lib/did_you_mean.rb +17 -16
data/lib/did_you_mean/experimental.rb +2 -2
data/lib/did_you_mean/experimental/initializer_name_correction.rb +1 -1
data/lib/did_you_mean/experimental/ivar_name_correction.rb +3 -1
data/lib/did_you_mean/levenshtein.rb +1 -1
data/lib/did_you_mean/spell_checker.rb +7 -7
data/lib/did_you_mean/spell_checkers/key_error_checker.rb +8 -2
data/lib/did_you_mean/spell_checkers/method_name_checker.rb +14 -6
data/lib/did_you_mean/spell_checkers/name_error_checkers.rb +2 -2
data/lib/did_you_mean/spell_checkers/name_error_checkers/class_name_checker.rb +5 -5
data/lib/did_you_mean/spell_checkers/name_error_checkers/variable_name_checker.rb +1 -1
data/lib/did_you_mean/tree_spell_checker.rb +137 -0
data/lib/did_you_mean/verbose.rb +2 -2
data/lib/did_you_mean/version.rb +1 -1
data/test/core_ext/test_name_error_extension.rb +48 -0
data/test/edit_distance/{jaro_winkler_test.rb → test_jaro_winkler.rb} +2 -2
data/test/fixtures/mini_dir.yml +15 -0
data/test/fixtures/rspec_dir.yml +112 -0
data/test/helper.rb +29 -0
data/test/spell_checking/{class_name_check_test.rb → test_class_name_check.rb} +12 -10
data/test/spell_checking/{key_name_check_test.rb → test_key_name_check.rb} +18 -8
data/test/spell_checking/{method_name_check_test.rb → test_method_name_check.rb} +17 -15
data/test/spell_checking/{uncorrectable_name_check_test.rb → test_uncorrectable_name_check.rb} +3 -3
data/test/spell_checking/{variable_name_check_test.rb → test_variable_name_check.rb} +18 -16
data/test/{spell_checker_test.rb → test_spell_checker.rb} +2 -2
data/test/test_tree_spell_checker.rb +173 -0
data/test/test_verbose_formatter.rb +21 -0
data/test/tree_spell/change_word.rb +61 -0
data/test/tree_spell/human_typo.rb +89 -0
data/test/tree_spell/test_change_word.rb +38 -0
data/test/tree_spell/test_explore.rb +128 -0
data/test/tree_spell/test_human_typo.rb +24 -0
metadata +47 -58
data/.travis.yml +0 -23
data/test/core_ext/name_error_extension_test.rb +0 -51
data/test/experimental/initializer_name_correction_test.rb +0 -15
data/test/experimental/method_name_checker_test.rb +0 -13
data/test/test_helper.rb +0 -13
data/test/verbose_formatter_test.rb +0 -22

data/Gemfile CHANGED

@@ -4,6 +4,7 @@ source 'https://rubygems.org'
 gemspec
 gem 'benchmark-ips'
-gem 'benchmark-driver'
+gem 'benchmark_driver'
 gem 'memory_profiler'
 gem 'jaro_winkler', '>= 1.4.0'
+gem 'test-unit'

data/README.md CHANGED

@@ -1,4 +1,4 @@
-# did_you_mean [![Gem Version](https://badge.fury.io/rb/did_you_mean.svg)](https://rubygems.org/gems/did_you_mean) [![Build Status](https://travis-ci.org/yuki24/did_you_mean.svg?branch=master)](https://travis-ci.org/yuki24/did_you_mean)
+# did_you_mean [![Gem Version](https://badge.fury.io/rb/did_you_mean.svg)](https://rubygems.org/gems/did_you_mean) [![Build Status](https://travis-ci.org/ruby/did_you_mean.svg?branch=master)](https://travis-ci.org/ruby/did_you_mean)
 ## Installation
@@ -61,36 +61,6 @@ hash.fetch(:fooo)
 #    Did you mean?  :foo
 ```
-## Experimental Features
-Aside from the basic features above, the `did_you_mean` gem comes with experimental features. They can be enabled by calling `require 'did_you_mean/experimental'`.
-Note that **these experimental features should never be enabled in production as they would impact Ruby's performance and use some unstable Ruby APIs.**
-### Correcting an Instance Variable When It's Incorrectly Spelled
-```ruby
-require 'did_you_mean/experimental'
-@full_name = "Yuki Nishijima"
-@full_anme.split(" ")
-# => NoMethodError: undefined method `split' for nil:NilClass
-#    Did you mean?  @full_name
-```
-### Displaying a Warning When `initialize` is Incorrectly Spelled
-```ruby
-require 'did_you_mean/experimental'
-class Person
-  def intialize
-    ...
-  end
-end
-# => warning: intialize might be misspelled, perhaps you meant initialize?
-```
 ## Verbose Formatter
 This verbose formatter changes the error message format to take more lines/spaces so it'll be slightly easier to read the suggestions. This formatter can totally be used in any environment including production.
@@ -183,7 +153,7 @@ bundle exec benchmark-driver benchmark/speed.yml --rbenv '2.6.0 --jit;2.6.0;2.5.
 ## Contributing
-1. Fork it (http://github.com/yuki24/did_you_mean/fork)
+1. Fork it (https://github.com/ruby/did_you_mean/fork)
 2. Create your feature branch (`git checkout -b my-new-feature`)
 3. Commit your changes (`git commit -am 'Add some feature'`)
 4. Make sure all tests pass (`bundle exec rake`)

data/Rakefile CHANGED

@@ -4,20 +4,19 @@ require 'rake/testtask'
 Rake::TestTask.new do |task|
   task.libs << "test"
-  task.test_files = Dir['test/**/*_test.rb'].reject {|path| /(experimental)/ =~ path }
+  task.test_files = Dir['test/**/test_*.rb'].reject {|path| path.end_with?("test_explore.rb") }
   task.verbose    = true
   task.warning    = true
 end
-Rake::TestTask.new("test:experimental") do |task|
+Rake::TestTask.new("test:explore") do |task|
   task.libs << "test"
-  task.pattern = 'test/experimental/**/*_test.rb'
+  task.pattern = 'test/tree_spell/test_explore.rb'
   task.verbose = true
   task.warning = true
-  task.ruby_opts << "-rdid_you_mean/experimental"
 end
-task default: %i(test test:experimental)
+task default: %i(test)
 namespace :test do
   namespace :accuracy do

data/appveyor.yml ADDED

@@ -0,0 +1,25 @@
+install:
+  - set PATH=C:\Ruby26-x64\bin;%PATH%
+build: off
+branches:
+  only:
+    - master
+environment:
+  ruby_version: "24-%Platform%"
+  zlib_version: "1.2.11"
+  matrix:
+    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
+    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
+before_test:
+  - ruby -v
+  - gem -v
+  - bundle -v
+  - gem uni did_you_mean
+test_script:
+  - rake

data/did_you_mean.gemspec CHANGED

@@ -1,7 +1,11 @@
 # coding: utf-8
 lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
-require 'did_you_mean/version'
+begin
+  require_relative "lib/did_you_mean/version"
+rescue LoadError # Fallback to load version file in ruby core repository
+  require_relative "version"
+end
 Gem::Specification.new do |spec|
   spec.name          = "did_you_mean"
@@ -10,7 +14,7 @@ Gem::Specification.new do |spec|
   spec.email         = ["mail@yukinishijima.net"]
   spec.summary       = '"Did you mean?" experience in Ruby'
   spec.description   = 'The gem that has been saving people from typos since 2014.'
-  spec.homepage      = "https://github.com/yuki24/did_you_mean"
+  spec.homepage      = "https://github.com/ruby/did_you_mean"
   spec.license       = "MIT"
   spec.files         = `git ls-files`.split($/).reject{|path| path.start_with?('evaluation/') }
@@ -19,7 +23,5 @@ Gem::Specification.new do |spec|
   spec.required_ruby_version = '>= 2.5.0'
-  spec.add_development_dependency "bundler"
   spec.add_development_dependency "rake"
-  spec.add_development_dependency "minitest"
 end

data/documentation/CHANGELOG.md.erb ADDED

@@ -0,0 +1,8 @@
+<% releases.each do |release| %>
+## [<%= release.name %>](https://github.com/<%= repository %>/tree/<%= release.tag_name %>)
+_<sup>released at <%= release.published_at %></sup>_
+<%= release.body.gsub(/\r\n/, "\n") %>
+<% end %>

data/documentation/changelog_generator.rb ADDED

@@ -0,0 +1,34 @@
+require 'octokit'
+require 'reverse_markdown'
+require 'erb'
+class ChangeLogGenerator
+  attr :repository, :template_path, :changelog_path
+  def initialize(repository, template_path: "CHANGELOG.md.erb", changelog_path: "CHANGELOG.md")
+    @repository     = repository
+    @template_path  = template_path
+    @changelog_path = changelog_path
+  end
+  def generate_and_save!
+    changelog_in_md   = ERB.new(template).result(binding)
+    changelog_in_html = Octokit.markdown(changelog_in_md, context: repository, mode: "gfm")
+    File.open(changelog_path, 'w') do |file|
+      file.write ReverseMarkdown.convert(changelog_in_html, github_flavored: true)
+    end
+  end
+  private
+  def template
+    open("#{__dir__}/#{template_path}").read
+  end
+  def releases
+    @releases ||= Octokit.releases(repository)
+  end
+end
+ChangeLogGenerator.new("ruby/did_you_mean").generate_and_save!

data/documentation/human_typo_api.md ADDED

@@ -0,0 +1,20 @@
+# HumanTypo API
+## Description
+Simulate an error prone human typist.  Assumes typographical errors are Poisson distributed and
+each error is either a deletion, insertion, substitution, or transposition
+## Initialization
+```
+def initialize(input, lambda: 0.05)
+end
+```
+where
+### input: A string with the word to be corrupted.
+## lambda: Error rate of the poisson process
+The default of 0.05 corresponds to one error every 20 characters, and is thought to approximate the average, competent typist
+## Methods
+```
+def call
+end
+```
+Returns a word with typographical errors.

data/documentation/tree_spell_algorithm.md ADDED

@@ -0,0 +1,82 @@
+# TreeSpellChecker Algorithm
+## Overview
+The algorithm is designed to work on a dictionary that has a rooted tree structure.
+The algorithm treats the problem as a hidden state system, which tries to identify the true state of the input. Due to typographical errors, the state of the input is a hidden version of the true system state.  Each word in the dictionary is mapped to a multi-dimensional state, with the first dimension being being the root, the second dimension being the next branch, and so on.  Each dimension is discrete with a finite number of elements.  The first dimension corresponds to the root, so only has one element.
+The algorithm assumes the input state has the correct structure, and so generates the state of the input.  It starts with the root of the input word and maps it to the root element. It then looks at the value of second dimension of the input word and chooses closest elements of the possible second dimension elements.  It then continues to the third and higher dimensions. It terminates when it has worked out possible elements corresponding to the highest dimension of the input word.  At this point it has the possible elements at for each dimension of the input word. It then generates all possible legitimate states from these elements.  Finally it then compares the possible leaves at the end of these legitimate states with the leaf of the input word. From this process it produces suggested states.
+## Accuracy
+The accuracy of the algorithm was tested using the HumanTypo class. It simulates a human typist by assuming that errors are Poisson distributed at a rate of one typo per 20 characters. Typos can be either a deletion, an insertion, substitution or a transposition.
+I ran 10,000 repititions on both the `test` directory of the ```did_you_mean``` gem and on the ```spec``` directory of the ```rspec-core``` gem.
+The results were as follows:
+```
+                               Minitest Summary
+--------------------------------------------------------------------------------
+ Method  |   First Time (%)    Mean Suggestions       Failures (%)
+--------------------------------------------------------------------------------
+ Tree                98.0                1.1                 2.0
+ Standard            98.1                2.2                 1.6
+ Augmented           100.0                1.1                 0.0
+```
+and
+```
+                               Rspec Summary
+--------------------------------------------------------------------------------
+ Method  |   First Time (%)    Mean Suggestions       Failures (%)
+--------------------------------------------------------------------------------
+ Tree                94.7                1.0                 5.3
+ Standard            98.2                4.2                 1.1
+ Augmented           99.7                1.2                 0.2
+```
+As well, I checked the results on the ```test``` directory with ```HumanTypo``` generating errors at three times the rate:
+```
+                Minitest Summary  (lambda = 0.15)
+--------------------------------------------------------------------------------
+ Method  |   First Time (%)    Mean Suggestions       Failures (%)
+--------------------------------------------------------------------------------
+ Tree               88.9                1.0                 11.0
+ Standard           95.0                1.4                 4.3
+ Combined           99.0                1.0                 0.8
+```
+In all cases, the tree speller, when augmented by the standard spell checker performed with higher accuracy, and giving far fewer suggestions.
+## Execution Speed
+I tested the execution time on the ```test ``` directory:
+```
+Testing execution time of Standard
+Average time (ms): 5.2
+Testing execution time of Tree
+Average time (ms): 1.1
+Testing execution time of Augmented Tree
+Average time (ms): 1.2
+```
+and on the ```spec``` directory
+```
+Testing execution time of Standard
+Average time (ms): 40.6
+Testing execution time of Tree
+Average time (ms): 2.7
+Testing execution time of Augmented Tree
+Average time (ms): 4.5
+```
+I was surprised by how much faster the tree checker was compared to the standard checker. I think the reason is that the predominant computational load will scale with O(log n) where n is the total number of words in the dictionary. My reasoning is that the algorithm very quickly prunes out states as it moves through the dimensions.
+## Augmentation option
+Given the major difference in speed between the standard and tree checker, and the likelihood that the disparity will grow rapidly with the size of the dictionary, then I suspect for some applications, it will not be not practicable to augment the tree checker by using the standard checker when the tree checker fails to find a suggestion.  Accordingly, I have added an option, ```:augment?```. The default is nil, but if true, then the standard checker is used if there are no suggestions.
+## Generation of Performance data
+This is done using ```test/tree_spell/explore_test.rb```. This is not a proper test file in that there are no assertions in it. As well, it takes over ten minutes to run, accordingly, I have disabled it by setting the constant TREE_SPELL_EXPLORE to false at the top of the file.  To run the file, set TREE_SPELL_EXPLORE to true.  It is also possible to run quick assessments by using a smaller value of n_repeat in the various tests.
+## Future Work
+I have identified two categories of remaining errors. The first class is when one of the elements is corrupted to being very small. Then the standard checker does not suggest the correct element, e.g. if an element is ```core``` and it is reduced to ```co```, the standard speller will not make a suggestion.  The second class of error is when the structure of the word is broken because one of the separators has been removed.  I think it might be possible to remove the first type of error and dramatically reduce the second type of error in a future version. This would be done as follows:
+- At each level, choose the dictionary element with the smallest distance.
+- This will work well unless the structure is broken, in which case it could return a wildly wrong suggestion.
+- To guard against this, the suggestion could be checked against the input word using the standard checker. If the standard checker rejects the suggestion, then it is assumed the structure is broken.
+- A large proportion of the time, a broken structure will be just due to one separator missing, and the order of the elements will not be affected. Accordingly, the structure can be fixed by comparing the input elements with a concatenation of two levels of the dictionary elements.  It would be possible to use the same idea to fix more than one separator missing, but this could quickly become computationally expensive.

data/documentation/tree_spell_checker_api.md ADDED

@@ -0,0 +1,24 @@
+# TreeSpellChecker API
+## Description
+## Initialization
+```
+def initialize(dictionary:, separator: '/', augment: nil)
+end
+```
+where
+####dictionary: The dictionary is a list of possible words
+    * that are used to correct a misspelling
+    * The dictionary must be tree structured with a single character separator
+    * e.g 'spec/models/goals_spec_rb'.
+####separator: A single charactor.  Cannot be cannot be alphabetical, '@' or '.'.
+####augment: When set to true, the checker will used the standard ```SpellChecker``` to find possible suggestions.
+## Methods
+```
+def correct(input)
+end
+```
+where
+####input: Is the input word to be corrected.

data/lib/did_you_mean.rb CHANGED

@@ -1,13 +1,13 @@
-require "did_you_mean/version"
-require "did_you_mean/core_ext/name_error"
+require_relative "did_you_mean/version"
+require_relative "did_you_mean/core_ext/name_error"
-require "did_you_mean/spell_checker"
-require 'did_you_mean/spell_checkers/name_error_checkers'
-require 'did_you_mean/spell_checkers/method_name_checker'
-require 'did_you_mean/spell_checkers/key_error_checker'
-require 'did_you_mean/spell_checkers/null_checker'
-require "did_you_mean/formatters/plain_formatter"
+require_relative "did_you_mean/spell_checker"
+require_relative 'did_you_mean/spell_checkers/name_error_checkers'
+require_relative 'did_you_mean/spell_checkers/method_name_checker'
+require_relative 'did_you_mean/spell_checkers/key_error_checker'
+require_relative 'did_you_mean/spell_checkers/null_checker'
+require_relative 'did_you_mean/formatters/plain_formatter'
+require_relative 'did_you_mean/tree_spell_checker'
 # The +DidYouMean+ gem adds functionality to suggest possible method/class
 # names upon errors such as +NameError+ and +NoMethodError+. In Ruby 2.3 or
@@ -86,14 +86,15 @@ module DidYouMean
   # Map of error types and spell checker objects.
   SPELL_CHECKERS = Hash.new(NullChecker)
-  SPELL_CHECKERS.merge!({
-    "NameError"     => NameErrorCheckers,
-    "NoMethodError" => MethodNameChecker,
-    "KeyError"      => KeyErrorChecker
-  })
+  # Adds +DidYouMean+ functionality to an error using a given spell checker
+  def self.correct_error(error_class, spell_checker)
+    SPELL_CHECKERS[error_class.name] = spell_checker
+    error_class.prepend(Correctable) unless error_class < Correctable
+  end
-  NameError.prepend DidYouMean::Correctable
-  KeyError.prepend DidYouMean::Correctable
+  correct_error NameError, NameErrorCheckers
+  correct_error KeyError, KeyErrorChecker
+  correct_error NoMethodError, MethodNameChecker
   # Returns the currenctly set formatter. By default, it is set to +DidYouMean::Formatter+.
   def self.formatter

data/lib/did_you_mean/experimental.rb CHANGED

@@ -1,2 +1,2 @@
-require 'did_you_mean/experimental/initializer_name_correction'
-require 'did_you_mean/experimental/ivar_name_correction'
+warn "Experimental features in the did_you_mean gem has been removed " \
+     "and `require \"did_you_mean/experimental\"' has no effect."

data/lib/did_you_mean/experimental/initializer_name_correction.rb CHANGED

@@ -1,6 +1,6 @@
 # frozen-string-literal: true
-require 'did_you_mean/levenshtein'
+require_relative '../levenshtein'
 module DidYouMean
   module Experimental

data/lib/did_you_mean/experimental/ivar_name_correction.rb CHANGED

@@ -1,6 +1,6 @@
 # frozen-string-literal: true
-require 'did_you_mean'
+require_relative '../../did_you_mean'
 module DidYouMean
   module Experimental #:nodoc:
@@ -36,6 +36,8 @@ module DidYouMean
         @location   = no_method_error.backtrace_locations.first
         @ivar_names = no_method_error.frame_binding.receiver.instance_variables
+        no_method_error.remove_instance_variable(:@frame_binding)
       end
       def corrections

data/lib/did_you_mean/levenshtein.rb CHANGED

@@ -41,7 +41,7 @@ module DidYouMean
     # detects the minimum value out of three arguments. This method is
     # faster than `[a, b, c].min` and puts less GC pressure.
-    # See https://github.com/yuki24/did_you_mean/pull/1 for a performance
+    # See https://github.com/ruby/did_you_mean/pull/1 for a performance
     # benchmark.
     def min3(a, b, c)
       if a < b && a < c

data/lib/did_you_mean/spell_checker.rb CHANGED

@@ -1,11 +1,11 @@
 # frozen-string-literal: true
-require "did_you_mean/levenshtein"
-require "did_you_mean/jaro_winkler"
+require_relative "levenshtein"
+require_relative "jaro_winkler"
 module DidYouMean
   class SpellChecker
-    def initialize(dictionary: )
+    def initialize(dictionary:)
       @dictionary = dictionary
     end
@@ -13,14 +13,14 @@ module DidYouMean
       input     = normalize(input)
       threshold = input.length > 3 ? 0.834 : 0.77
-      words = @dictionary.select {|word| JaroWinkler.distance(normalize(word), input) >= threshold }
-      words.reject! {|word| input == word.to_s }
-      words.sort_by! {|word| JaroWinkler.distance(word.to_s, input) }
+      words = @dictionary.select { |word| JaroWinkler.distance(normalize(word), input) >= threshold }
+      words.reject! { |word| input == word.to_s }
+      words.sort_by! { |word| JaroWinkler.distance(word.to_s, input) }
       words.reverse!
       # Correct mistypes
       threshold   = (input.length * 0.25).ceil
-      corrections = words.select {|c| Levenshtein.distance(normalize(c), input) <= threshold }
+      corrections = words.select { |c| Levenshtein.distance(normalize(c), input) <= threshold }
       # Correct misspells
       if corrections.empty?

data/lib/did_you_mean/spell_checkers/key_error_checker.rb CHANGED

@@ -1,4 +1,4 @@
-require "did_you_mean/spell_checker"
+require_relative "../spell_checker"
 module DidYouMean
   class KeyErrorChecker
@@ -8,7 +8,13 @@ module DidYouMean
     end
     def corrections
-      @corrections ||= SpellChecker.new(dictionary: @keys).correct(@key).map(&:inspect)
+      @corrections ||= exact_matches.empty? ? SpellChecker.new(dictionary: @keys).correct(@key).map(&:inspect) : exact_matches
+    end
+    private
+    def exact_matches
+      @exact_matches ||= @keys.select { |word| @key == word.to_s }.map(&:inspect)
     end
   end
 end