forspell 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Forspell
4
+ class FileList
5
+ include Enumerable
6
+ class PathLoadError < StandardError; end
7
+
8
+ EXTENSION_GLOBS = %w[
9
+ rb
10
+ c
11
+ cpp
12
+ cxx
13
+ md
14
+ ].freeze
15
+
16
+ def initialize(paths:, exclude_paths:)
17
+ @paths = paths
18
+ @exclude_paths = exclude_paths
19
+ end
20
+
21
+ def each(&block)
22
+ to_process = @paths.flat_map(&method(:expand_paths))
23
+
24
+ to_exclude = @exclude_paths.flat_map(&method(:expand_paths))
25
+
26
+ (to_process - to_exclude).map{ |path| path.gsub('//', '/')}
27
+ .each(&block)
28
+ end
29
+
30
+ private
31
+
32
+ def expand_paths(path)
33
+ if File.directory?(path)
34
+ Dir.glob(File.join(path, '**', "*.{#{EXTENSION_GLOBS.join(',')}}"))
35
+ elsif File.exists? path
36
+ path
37
+ else
38
+ raise PathLoadError, path
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'backports/2.4.0/hash'
4
+ require_relative '../sanitizer'
5
+ require_relative '../word_matcher'
6
+
7
+ module Forspell::Loaders
8
+ Word = Struct.new(:file, :line, :text)
9
+
10
+ class Base
11
+
12
+ def initialize(file: nil, text: nil)
13
+ @file = file
14
+ @input = text || input
15
+ @words = []
16
+ @errors = []
17
+ end
18
+
19
+ def read
20
+ extract_words.each { |word| word.text = Forspell::Sanitizer.sanitize(word.text) }
21
+ .select{ |word| Forspell::WordMatcher.word?(word.text) }
22
+ .reject { |w| w.text.nil? || w.text.empty? }
23
+ rescue YARD::Parser::ParserSyntaxError, RuntimeError => e
24
+ raise Forspell::Loaders::ParsingError, e.message
25
+ end
26
+
27
+ private
28
+
29
+ def input
30
+ File.read(@file)
31
+ end
32
+
33
+ def extract_words
34
+ raise NotImplementedError
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'source'
3
+
4
+ module Forspell::Loaders
5
+ class C < Source
6
+ def input
7
+ res = super
8
+ res.encode('UTF-8', invalid: :replace, replace: '?') unless res.valid_encoding?
9
+ res
10
+ end
11
+
12
+ private
13
+
14
+ def comments
15
+ YARD::Parser::C::CParser.new(@input, @file).parse
16
+ .grep(YARD::Parser::C::Comment)
17
+ end
18
+
19
+ def text(comment)
20
+ comment.source
21
+ end
22
+
23
+ def line(comment)
24
+ comment.line
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'kramdown'
4
+ require 'kramdown-parser-gfm'
5
+
6
+ require_relative './base'
7
+
8
+ module Forspell::Loaders
9
+ class Markdown < Base
10
+ class FilteredHash
11
+ PERMITTED_TYPES = %i[
12
+ text
13
+ smart_quote
14
+ ].freeze
15
+
16
+ def convert(el, options)
17
+ return if !PERMITTED_TYPES.include?(el.type) && el.children.empty?
18
+
19
+ hash = { type: el.type }
20
+ hash[:attr] = el.attr unless el.attr.empty?
21
+ hash[:value] = el.value unless el.value.nil?
22
+ hash[:location] = el.options[:location]
23
+ unless el.children.empty?
24
+ hash[:children] = []
25
+ el.children.each { |child| hash[:children] << convert(child, options) }
26
+ end
27
+ hash
28
+ end
29
+ end
30
+
31
+ PARSER = 'GFM'
32
+ SPECIAL_CHARS_MAP = {
33
+ lsquo: "'",
34
+ rsquo: "'",
35
+ ldquo: '"',
36
+ rdquo: '"'
37
+ }.freeze
38
+
39
+ def extract_words
40
+ document = Kramdown::Document.new(@input, input: PARSER)
41
+ tree = FilteredHash.new.convert(document.root, document.options)
42
+ chunks = extract_chunks(tree)
43
+ result = []
44
+ return result if chunks.empty?
45
+
46
+ group_by_location = chunks.group_by { |res| res[:location] }
47
+ .transform_values do |lines|
48
+ lines.map { |v| SPECIAL_CHARS_MAP[v[:value]] || v[:value] }.join.split(/\s|,|;|—/)
49
+ end
50
+ group_by_location.each_pair do |location, words|
51
+ words.reject(&:empty?)
52
+ .each { |word| result << Word.new(@file, location || 0, word) }
53
+ end
54
+
55
+ result
56
+ rescue RuntimeError => e
57
+ raise Forspell::Loaders::ParsingError, e.message
58
+ end
59
+
60
+ private
61
+
62
+ def extract_chunks(tree)
63
+ tree[:children].grep(Hash).flat_map do |child|
64
+ if child[:children]
65
+ extract_chunks(child)
66
+ else
67
+ {
68
+ location: child[:location],
69
+ value: child[:value]
70
+ }
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yard'
4
+ require 'yard/parser/ruby/ruby_parser'
5
+ require_relative 'source'
6
+
7
+ module Forspell::Loaders
8
+ class Ruby < Source
9
+ private
10
+
11
+ def comments
12
+ YARD::Parser::Ruby::RubyParser.new(@input, @file).parse
13
+ .tokens.select{ |token| token.first == :comment }
14
+ # example: [:comment, "# def loader_class path\n", [85, 2356]]
15
+ end
16
+
17
+ def text(comment)
18
+ comment[1]
19
+ end
20
+
21
+ def line(comment)
22
+ comment.last.first
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yard'
4
+ require_relative 'base'
5
+ require_relative 'markdown'
6
+
7
+ module Forspell
8
+ module Loaders
9
+ class Source < Base
10
+ private
11
+
12
+ def extract_words
13
+ comments.flat_map do |comment|
14
+ Markdown.new(text: text(comment)).read
15
+ .map do |word|
16
+ word.file = @file
17
+ word.line += line(comment) - 1
18
+ word
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'loaders/markdown'
4
+ require_relative 'loaders/ruby'
5
+ require_relative 'loaders/c'
6
+
7
+ module Forspell
8
+ module Loaders
9
+ class ParsingError < StandardError; end
10
+
11
+ EXT_TO_PARSER_CLASS = {
12
+ '.rb' => Loaders::Ruby,
13
+ '.c' => Loaders::C,
14
+ '.cpp' => Loaders::C,
15
+ '.cxx' => Loaders::C,
16
+ '.md' => Loaders::Markdown
17
+ }.freeze
18
+
19
+ def self.for(path)
20
+ EXT_TO_PARSER_CLASS[File.extname(path)].new(file: path)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+ require 'pastel'
5
+ require 'logger'
6
+ require 'json'
7
+
8
+ module Forspell
9
+ class Reporter
10
+ SUCCESS_CODE = 0
11
+ ERROR_CODE = 1
12
+ ERROR_FORMAT = '%<file>s:%<line>i: %<text>s (suggestions: %<suggestions>s)'
13
+ SUMMARY = "Forspell inspects *.rb, *.c, *.cpp, *.md files\n"\
14
+ '%<files>i inspected, %<errors>s detected'
15
+
16
+ def initialize(logfile:,
17
+ verbose:,
18
+ format:)
19
+
20
+ FileUtils.touch(logfile) if logfile.is_a?(String)
21
+ @logger = Logger.new(logfile || STDERR)
22
+ @logger.level = verbose ? Logger::INFO : Logger::WARN
23
+ @logger.formatter = proc { |*, msg| "#{msg}\n" }
24
+ @format = format
25
+
26
+ @pastel = Pastel.new(enabled: $stdout.tty?)
27
+ @errors = []
28
+ @files = []
29
+ end
30
+
31
+ def file(path)
32
+ @logger.info "Processing #{path}"
33
+ @files << path
34
+ end
35
+
36
+ def error(word, suggestions)
37
+ @errors << [word, suggestions]
38
+ puts readable(word, suggestions) if @format == 'readable'
39
+ end
40
+
41
+ def parsing_error(error)
42
+ @logger.error "Parsing error in #{@files.last}: #{error}"
43
+ end
44
+
45
+ def path_load_error path
46
+ @logger.error "Path not found: #{path}"
47
+ end
48
+
49
+ def report
50
+ case @format
51
+ when 'readable'
52
+ print_summary
53
+ when 'dictionary'
54
+ print_dictionary
55
+ when 'json', 'yaml'
56
+ print_formatted
57
+ end
58
+ end
59
+
60
+ def finalize
61
+ @errors.empty? ? SUCCESS_CODE : ERROR_CODE
62
+ end
63
+
64
+ private
65
+
66
+ def readable(word, suggestions)
67
+ format(ERROR_FORMAT,
68
+ file: word[:file],
69
+ line: word[:line],
70
+ text: @pastel.red(word[:text]),
71
+ suggestions: suggestions.join(', '))
72
+ end
73
+
74
+ def print_formatted
75
+ @errors.map { |word, suggestions| word.to_h.merge(suggestions: suggestions) }
76
+ .public_send("to_#{@format}")
77
+ .tap { |res| puts res }
78
+ end
79
+
80
+ def print_summary
81
+ err_count = @errors.size
82
+ color = err_count.positive? ? :red : :green
83
+ total_errors_colorized = @pastel.decorate(err_count.to_s, color)
84
+
85
+ puts format(SUMMARY, files: @files.size, errors: total_errors_colorized)
86
+ end
87
+
88
+ def print_dictionary
89
+ @errors.map(&:first)
90
+ .group_by(&:text)
91
+ .transform_values { |v| v.map(&:file).uniq }
92
+ .sort_by { |word, *| word.downcase }
93
+ .each do |text, files|
94
+ files.each { |file| puts "\# #{file}" }
95
+ puts @pastel.decorate(text, :red)
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,77 @@
1
+ Gemfile: example
2
+ Rakefile
3
+ accessor: example
4
+ admin: example
5
+ args
6
+ async
7
+ attr: example
8
+ backend: example
9
+ backport: port
10
+ backtrace: example
11
+ bitwise
12
+ boolean: example
13
+ builtin
14
+ bundler
15
+ charset: example
16
+ codepoint: example
17
+ composable
18
+ config
19
+ dataset: set
20
+ deserialization
21
+ deserialize: rise
22
+ dir
23
+ encoding: example
24
+ enum
25
+ fallback: example
26
+ filesystem: system
27
+ formatter: example
28
+ geospatial
29
+ i18n
30
+ initializer: example
31
+ inline
32
+ io
33
+ lexer: example
34
+ lib: rib
35
+ lifecycle: example
36
+ memoization
37
+ memoized
38
+ metadata
39
+ middleware: example
40
+ mixin: toxin
41
+ monkeypatch: patch
42
+ monkeypatching
43
+ multithreaded
44
+ multithreading
45
+ mutex: class
46
+ namespace: example
47
+ override: ride
48
+ param: example
49
+ parens
50
+ parser: example
51
+ plugin: penguin
52
+ pre
53
+ prepend: append
54
+ proc
55
+ refactor
56
+ refactoring
57
+ regex
58
+ regexp
59
+ repo
60
+ rubygems
61
+ runtime: example
62
+ stderr
63
+ stdin
64
+ stdlib
65
+ stdout
66
+ stylesheet: sheet
67
+ subclass: class
68
+ subclassing
69
+ subtype: example
70
+ superclass: class
71
+ timestamp: stamp
72
+ tokenizer: example
73
+ truthy
74
+ unescape
75
+ unicode
76
+ username: example
77
+ whitespace: example
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'loaders'
3
+
4
+ module Forspell
5
+ class Runner
6
+ def initialize(files:, speller:, reporter:)
7
+ @files = files
8
+ @speller = speller
9
+ @reporter = reporter
10
+ end
11
+
12
+ def call
13
+ @files.each do |path|
14
+ process_file path
15
+ end
16
+
17
+ @reporter.report
18
+
19
+ self
20
+ end
21
+
22
+ private
23
+
24
+ def process_file path
25
+ @reporter.file(path)
26
+
27
+ words = Loaders.for(path).read
28
+ words.reject { |word| @speller.correct?(word.text) }
29
+ .each { |word| @reporter.error(word, @speller.suggest(word.text)) }
30
+
31
+ rescue Forspell::Loaders::ParsingError => e
32
+ @reporter.parsing_error(e) and return
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sanitize'
4
+ require 'cgi'
5
+
6
+ module Forspell
7
+ module Sanitizer
8
+ REMOVE_PUNCT = /[[:punct:]&&[^\-\'\_\.]]$/.freeze
9
+
10
+ def self.sanitize(input)
11
+
12
+ CGI.unescapeHTML(Sanitize.fragment(input,
13
+ elements: [], remove_contents: true))
14
+ .gsub(REMOVE_PUNCT, '').gsub(/[\!\.\?]{1}$/, '')
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ffi/hunspell'
4
+
5
+ module Forspell
6
+ class Speller
7
+ attr_reader :dictionary
8
+
9
+ SUGGESTIONS_SIZE = 3
10
+ HUNSPELL_DIRS = [File.join(__dir__, 'dictionaries')]
11
+ RUBY_DICT = File.join(__dir__, 'ruby.dict')
12
+
13
+ def initialize(main_dictionary, *custom_dictionaries)
14
+ FFI::Hunspell.directories = HUNSPELL_DIRS << File.dirname(main_dictionary)
15
+ @dictionary = FFI::Hunspell.dict(File.basename(main_dictionary))
16
+
17
+ [RUBY_DICT, *custom_dictionaries].flat_map { |path| File.read(path).split("\n") }
18
+ .compact
19
+ .map { |line| line.gsub(/\s*\#.*$/, '') }
20
+ .reject(&:empty?)
21
+ .map { |line| line.split(/\s*:\s*/, 2) }
22
+ .each do |word, example|
23
+ example ? @dictionary.add_with_affix(word, example) : @dictionary.add(word)
24
+ end
25
+ rescue ArgumentError
26
+ puts "Unable to find dictionary #{main_dictionary}"
27
+ exit(2)
28
+ end
29
+
30
+ def correct?(word)
31
+ dictionary.check?(word)
32
+ end
33
+
34
+ def suggest(word)
35
+ dictionary.suggest(word).first(SUGGESTIONS_SIZE)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,18 @@
1
+ require 'backports/2.4.0/regexp/match'
2
+
3
+ module Forspell
4
+ module WordMatcher
5
+ WORD = %r{^
6
+ \'? # could start with apostrophe
7
+ ([a-z]|[A-Z])? # at least one letter,
8
+ ([[:lower:]])+ # then any number of letters,
9
+ ([\'\-])? # optional dash/apostrophe,
10
+ ([[:lower:]])* # another bunch of letters
11
+ \'? # could end with apostrophe
12
+ $}x
13
+
14
+ def self.word? text
15
+ WORD.match?(text)
16
+ end
17
+ end
18
+ end
data/lib/forspell.rb ADDED
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Forspell; end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: forspell
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Kirill Kuprikov
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-04-01 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Forspell is spellchecker for code and documentation.It uses well-known
14
+ hunspell tool and dictionary, provides customizable output, and could be easily
15
+ integrated into CI pipeline.
16
+ email: kkuprikov@gmail.com
17
+ executables:
18
+ - forspell
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - README.md
23
+ - exe/clone_repos.sh
24
+ - exe/create_dictionary
25
+ - exe/forspell
26
+ - exe/generate_logs
27
+ - lib/forspell.rb
28
+ - lib/forspell/cli.rb
29
+ - lib/forspell/dictionaries/en_US.aff
30
+ - lib/forspell/dictionaries/en_US.dic
31
+ - lib/forspell/file_list.rb
32
+ - lib/forspell/loaders.rb
33
+ - lib/forspell/loaders/base.rb
34
+ - lib/forspell/loaders/c.rb
35
+ - lib/forspell/loaders/markdown.rb
36
+ - lib/forspell/loaders/ruby.rb
37
+ - lib/forspell/loaders/source.rb
38
+ - lib/forspell/reporter.rb
39
+ - lib/forspell/ruby.dict
40
+ - lib/forspell/runner.rb
41
+ - lib/forspell/sanitizer.rb
42
+ - lib/forspell/speller.rb
43
+ - lib/forspell/word_matcher.rb
44
+ homepage: http://github.com/kkuprikov/forspell
45
+ licenses:
46
+ - MIT
47
+ metadata: {}
48
+ post_install_message:
49
+ rdoc_options: []
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirements: []
63
+ rubyforge_project:
64
+ rubygems_version: 2.7.8
65
+ signing_key:
66
+ specification_version: 4
67
+ summary: For spelling check
68
+ test_files: []