RubyGems - forspell - Versions diffs - 0.0.2 - Mend

forspell 0.0.2

Files changed (24) hide show

checksums.yaml +7 -0
data/README.md +88 -0
data/exe/clone_repos.sh +72 -0
data/exe/create_dictionary +47 -0
data/exe/forspell +4 -0
data/exe/generate_logs +8 -0
data/lib/forspell/cli.rb +91 -0
data/lib/forspell/dictionaries/en_US.aff +205 -0
data/lib/forspell/dictionaries/en_US.dic +49271 -0
data/lib/forspell/file_list.rb +42 -0
data/lib/forspell/loaders/base.rb +37 -0
data/lib/forspell/loaders/c.rb +27 -0
data/lib/forspell/loaders/markdown.rb +75 -0
data/lib/forspell/loaders/ruby.rb +25 -0
data/lib/forspell/loaders/source.rb +24 -0
data/lib/forspell/loaders.rb +23 -0
data/lib/forspell/reporter.rb +99 -0
data/lib/forspell/ruby.dict +77 -0
data/lib/forspell/runner.rb +35 -0
data/lib/forspell/sanitizer.rb +17 -0
data/lib/forspell/speller.rb +38 -0
data/lib/forspell/word_matcher.rb +18 -0
data/lib/forspell.rb +3 -0
metadata +68 -0

data/lib/forspell/file_list.rb ADDED Viewed

@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+module Forspell
+  class FileList
+    include Enumerable
+    class PathLoadError < StandardError; end
+    EXTENSION_GLOBS = %w[
+      rb
+      c
+      cpp
+      cxx
+      md
+    ].freeze
+    def initialize(paths:, exclude_paths:)
+      @paths = paths
+      @exclude_paths = exclude_paths
+    end
+    def each(&block)
+      to_process = @paths.flat_map(&method(:expand_paths))
+      to_exclude = @exclude_paths.flat_map(&method(:expand_paths))
+      (to_process - to_exclude).map{ |path| path.gsub('//', '/')}
+        .each(&block)
+    end
+    private
+    def expand_paths(path)
+      if File.directory?(path)
+        Dir.glob(File.join(path, '**', "*.{#{EXTENSION_GLOBS.join(',')}}"))
+      elsif File.exists? path
+        path
+      else
+        raise PathLoadError, path
+      end
+    end
+  end
+end

data/lib/forspell/loaders/base.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+require 'backports/2.4.0/hash'
+require_relative '../sanitizer'
+require_relative '../word_matcher'
+module Forspell::Loaders
+  Word = Struct.new(:file, :line, :text)
+  class Base
+    def initialize(file: nil, text: nil)
+      @file = file
+      @input = text || input
+      @words = []
+      @errors = []
+    end
+    def read
+      extract_words.each { |word| word.text = Forspell::Sanitizer.sanitize(word.text) }
+                   .select{ |word| Forspell::WordMatcher.word?(word.text) }
+                   .reject { |w| w.text.nil? || w.text.empty? }
+    rescue YARD::Parser::ParserSyntaxError, RuntimeError => e
+      raise Forspell::Loaders::ParsingError, e.message
+    end
+    private
+    def input
+      File.read(@file)
+    end
+    def extract_words
+      raise NotImplementedError
+    end
+  end
+end

data/lib/forspell/loaders/c.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+require_relative 'source'
+module Forspell::Loaders
+  class C < Source
+    def input
+      res = super
+      res.encode('UTF-8', invalid: :replace, replace: '?') unless res.valid_encoding?
+      res
+    end
+    private
+    def comments
+      YARD::Parser::C::CParser.new(@input, @file).parse
+        .grep(YARD::Parser::C::Comment)
+    end
+    def text(comment)
+      comment.source
+    end
+    def line(comment)
+      comment.line
+    end
+  end
+end

data/lib/forspell/loaders/markdown.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+require 'kramdown'
+require 'kramdown-parser-gfm'
+require_relative './base'
+module Forspell::Loaders
+  class Markdown < Base
+    class FilteredHash
+      PERMITTED_TYPES = %i[
+        text
+        smart_quote
+      ].freeze
+      def convert(el, options)
+        return if !PERMITTED_TYPES.include?(el.type) && el.children.empty?
+        hash = { type: el.type }
+        hash[:attr] = el.attr unless el.attr.empty?
+        hash[:value] = el.value unless el.value.nil?
+        hash[:location] = el.options[:location]
+        unless el.children.empty?
+          hash[:children] = []
+          el.children.each { |child| hash[:children] << convert(child, options) }
+        end
+        hash
+      end
+    end
+    PARSER = 'GFM'
+    SPECIAL_CHARS_MAP = {
+      lsquo: "'",
+      rsquo: "'",
+      ldquo: '"',
+      rdquo: '"'
+    }.freeze
+    def extract_words
+      document = Kramdown::Document.new(@input, input: PARSER)
+      tree = FilteredHash.new.convert(document.root, document.options)
+      chunks = extract_chunks(tree)
+      result = []
+      return result if chunks.empty?
+      group_by_location = chunks.group_by { |res| res[:location] }
+                                .transform_values do |lines|
+        lines.map { |v| SPECIAL_CHARS_MAP[v[:value]] || v[:value] }.join.split(/\s|,|;|—/)
+      end
+      group_by_location.each_pair do |location, words|
+        words.reject(&:empty?)
+             .each { |word| result << Word.new(@file, location || 0, word) }
+      end
+      result
+    rescue RuntimeError => e
+      raise Forspell::Loaders::ParsingError, e.message
+    end
+    private
+    def extract_chunks(tree)
+      tree[:children].grep(Hash).flat_map do |child|
+        if child[:children]
+          extract_chunks(child)
+        else
+          {
+            location: child[:location],
+            value: child[:value]
+          }
+        end
+      end
+    end
+  end
+end

data/lib/forspell/loaders/ruby.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+require 'yard'
+require 'yard/parser/ruby/ruby_parser'
+require_relative 'source'
+module Forspell::Loaders
+  class Ruby < Source
+    private
+    def comments
+      YARD::Parser::Ruby::RubyParser.new(@input, @file).parse
+        .tokens.select{ |token| token.first == :comment }
+      # example: [:comment, "# def loader_class path\n", [85, 2356]]
+    end
+    def text(comment)
+      comment[1]
+    end
+    def line(comment)
+      comment.last.first
+    end
+  end
+end

data/lib/forspell/loaders/source.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+require 'yard'
+require_relative 'base'
+require_relative 'markdown'
+module Forspell
+  module Loaders
+    class Source < Base
+      private
+      def extract_words
+        comments.flat_map do |comment|
+          Markdown.new(text: text(comment)).read
+                  .map do |word|
+                    word.file = @file
+                    word.line += line(comment) - 1
+                    word
+                  end
+        end
+      end
+    end
+  end
+end

data/lib/forspell/loaders.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+require_relative 'loaders/markdown'
+require_relative 'loaders/ruby'
+require_relative 'loaders/c'
+module Forspell
+  module Loaders
+    class ParsingError < StandardError; end
+    EXT_TO_PARSER_CLASS = {
+      '.rb' => Loaders::Ruby,
+      '.c' => Loaders::C,
+      '.cpp' => Loaders::C,
+      '.cxx' => Loaders::C,
+      '.md' => Loaders::Markdown
+    }.freeze
+    def self.for(path)
+      EXT_TO_PARSER_CLASS[File.extname(path)].new(file: path)
+    end
+  end
+end

data/lib/forspell/reporter.rb ADDED Viewed

@@ -0,0 +1,99 @@
+# frozen_string_literal: true
+require 'fileutils'
+require 'pastel'
+require 'logger'
+require 'json'
+module Forspell
+  class Reporter
+    SUCCESS_CODE = 0
+    ERROR_CODE = 1
+    ERROR_FORMAT = '%<file>s:%<line>i: %<text>s (suggestions: %<suggestions>s)'
+    SUMMARY = "Forspell inspects *.rb, *.c, *.cpp, *.md files\n"\
+              '%<files>i inspected, %<errors>s detected'
+    def initialize(logfile:,
+                   verbose:,
+                   format:)
+      FileUtils.touch(logfile) if logfile.is_a?(String)
+      @logger = Logger.new(logfile || STDERR)
+      @logger.level = verbose ? Logger::INFO : Logger::WARN
+      @logger.formatter = proc { |*, msg| "#{msg}\n" }
+      @format = format
+      @pastel = Pastel.new(enabled: $stdout.tty?)
+      @errors = []
+      @files = []
+    end
+    def file(path)
+      @logger.info "Processing #{path}"
+      @files << path
+    end
+    def error(word, suggestions)
+      @errors << [word, suggestions]
+      puts readable(word, suggestions) if @format == 'readable'
+    end
+    def parsing_error(error)
+      @logger.error "Parsing error in #{@files.last}: #{error}"
+    end
+    def path_load_error path
+      @logger.error "Path not found: #{path}"
+    end
+    def report
+      case @format
+      when 'readable'
+        print_summary
+      when 'dictionary'
+        print_dictionary
+      when 'json', 'yaml'
+        print_formatted
+      end
+    end
+    def finalize
+      @errors.empty? ? SUCCESS_CODE : ERROR_CODE
+    end
+    private
+    def readable(word, suggestions)
+      format(ERROR_FORMAT,
+             file: word[:file],
+             line: word[:line],
+             text: @pastel.red(word[:text]),
+             suggestions: suggestions.join(', '))
+    end
+    def print_formatted
+      @errors.map { |word, suggestions| word.to_h.merge(suggestions: suggestions) }
+             .public_send("to_#{@format}")
+             .tap { |res| puts res }
+    end
+    def print_summary
+      err_count = @errors.size
+      color = err_count.positive? ? :red : :green
+      total_errors_colorized = @pastel.decorate(err_count.to_s, color)
+      puts format(SUMMARY, files: @files.size, errors: total_errors_colorized)
+    end
+    def print_dictionary
+      @errors.map(&:first)
+             .group_by(&:text)
+             .transform_values { |v| v.map(&:file).uniq }
+             .sort_by { |word, *| word.downcase }
+             .each do |text, files|
+        files.each { |file| puts "\# #{file}" }
+        puts @pastel.decorate(text, :red)
+      end
+    end
+  end
+end

data/lib/forspell/ruby.dict ADDED Viewed

@@ -0,0 +1,77 @@
+Gemfile: example
+Rakefile
+accessor: example
+admin: example
+args
+async
+attr: example
+backend: example
+backport: port
+backtrace: example
+bitwise
+boolean: example
+builtin
+bundler
+charset: example
+codepoint: example
+composable
+config
+dataset: set
+deserialization
+deserialize: rise
+dir
+encoding: example
+enum
+fallback: example
+filesystem: system
+formatter: example
+geospatial
+i18n
+initializer: example
+inline
+io
+lexer: example
+lib: rib
+lifecycle: example
+memoization
+memoized
+metadata
+middleware: example
+mixin: toxin
+monkeypatch: patch
+monkeypatching
+multithreaded
+multithreading
+mutex: class
+namespace: example
+override: ride
+param: example
+parens
+parser: example
+plugin: penguin
+pre
+prepend: append
+proc
+refactor
+refactoring
+regex
+regexp
+repo
+rubygems
+runtime: example
+stderr
+stdin
+stdlib
+stdout
+stylesheet: sheet
+subclass: class
+subclassing
+subtype: example
+superclass: class
+timestamp: stamp
+tokenizer: example
+truthy
+unescape
+unicode
+username: example
+whitespace: example

data/lib/forspell/runner.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+require_relative 'loaders'
+module Forspell
+  class Runner
+    def initialize(files:, speller:, reporter:)
+      @files = files
+      @speller = speller
+      @reporter = reporter
+    end
+    def call
+      @files.each do |path|
+        process_file path
+      end
+      @reporter.report
+      self
+    end
+    private
+    def process_file path
+      @reporter.file(path)
+      words = Loaders.for(path).read
+      words.reject { |word| @speller.correct?(word.text) }
+           .each { |word| @reporter.error(word, @speller.suggest(word.text)) }
+    rescue Forspell::Loaders::ParsingError => e
+      @reporter.parsing_error(e) and return
+    end
+  end
+end

data/lib/forspell/sanitizer.rb ADDED Viewed

@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+require 'sanitize'
+require 'cgi'
+module Forspell
+  module Sanitizer
+    REMOVE_PUNCT = /[[:punct:]&&[^\-\'\_\.]]$/.freeze
+    def self.sanitize(input)
+      CGI.unescapeHTML(Sanitize.fragment(input,
+                                         elements: [], remove_contents: true))
+         .gsub(REMOVE_PUNCT, '').gsub(/[\!\.\?]{1}$/, '')
+    end
+  end
+end

data/lib/forspell/speller.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+require 'ffi/hunspell'
+module Forspell
+  class Speller
+    attr_reader :dictionary
+    SUGGESTIONS_SIZE = 3
+    HUNSPELL_DIRS = [File.join(__dir__, 'dictionaries')]
+    RUBY_DICT = File.join(__dir__, 'ruby.dict')
+    def initialize(main_dictionary, *custom_dictionaries)
+      FFI::Hunspell.directories = HUNSPELL_DIRS << File.dirname(main_dictionary)
+      @dictionary = FFI::Hunspell.dict(File.basename(main_dictionary))
+      [RUBY_DICT, *custom_dictionaries].flat_map { |path| File.read(path).split("\n") }
+                                       .compact
+                                       .map { |line| line.gsub(/\s*\#.*$/, '') }
+                                       .reject(&:empty?)
+                                       .map { |line| line.split(/\s*:\s*/, 2) }
+                                       .each do |word, example|
+        example ? @dictionary.add_with_affix(word, example) : @dictionary.add(word)
+      end
+    rescue ArgumentError
+      puts "Unable to find dictionary #{main_dictionary}"
+      exit(2)
+    end
+    def correct?(word)
+      dictionary.check?(word)
+    end
+    def suggest(word)
+      dictionary.suggest(word).first(SUGGESTIONS_SIZE)
+    end
+  end
+end

data/lib/forspell/word_matcher.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'backports/2.4.0/regexp/match'
+module Forspell
+  module WordMatcher
+    WORD = %r{^
+      \'?                # could start with apostrophe
+      ([a-z]|[A-Z])?     # at least one letter,
+      ([[:lower:]])+     # then any number of letters,
+      ([\'\-])?          # optional dash/apostrophe,
+      ([[:lower:]])*     # another bunch of letters
+      \'?                # could end with apostrophe
+    $}x
+    def self.word? text
+      WORD.match?(text)
+    end
+  end
+end

data/lib/forspell.rb ADDED Viewed

@@ -0,0 +1,3 @@
+# frozen_string_literal: true
+module Forspell; end

metadata ADDED Viewed

@@ -0,0 +1,68 @@
+--- !ruby/object:Gem::Specification
+name: forspell
+version: !ruby/object:Gem::Version
+  version: 0.0.2
+platform: ruby
+authors:
+- Kirill Kuprikov
+autorequire:
+bindir: exe
+cert_chain: []
+date: 2019-04-01 00:00:00.000000000 Z
+dependencies: []
+description: Forspell is spellchecker for code and documentation.It uses well-known
+  hunspell tool and dictionary, provides customizable output, and could be easily
+  integrated into CI pipeline.
+email: kkuprikov@gmail.com
+executables:
+- forspell
+extensions: []
+extra_rdoc_files: []
+files:
+- README.md
+- exe/clone_repos.sh
+- exe/create_dictionary
+- exe/forspell
+- exe/generate_logs
+- lib/forspell.rb
+- lib/forspell/cli.rb
+- lib/forspell/dictionaries/en_US.aff
+- lib/forspell/dictionaries/en_US.dic
+- lib/forspell/file_list.rb
+- lib/forspell/loaders.rb
+- lib/forspell/loaders/base.rb
+- lib/forspell/loaders/c.rb
+- lib/forspell/loaders/markdown.rb
+- lib/forspell/loaders/ruby.rb
+- lib/forspell/loaders/source.rb
+- lib/forspell/reporter.rb
+- lib/forspell/ruby.dict
+- lib/forspell/runner.rb
+- lib/forspell/sanitizer.rb
+- lib/forspell/speller.rb
+- lib/forspell/word_matcher.rb
+homepage: http://github.com/kkuprikov/forspell
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.7.8
+signing_key:
+specification_version: 4
+summary: For spelling check
+test_files: []