httpspell 1.5.0 → 1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.markdown +1 -1
- data/exe/httpspell +32 -12
- data/lib/http_spell/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 509242695286e955675a85e15957752f1ac19eba7a5ffda317f6e45fd41c6c01
|
4
|
+
data.tar.gz: 4537ecafb9c882a23024c00246b0c1a07359d5180b2ee052d68a25ea23a64f6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddf6cb8856cf025e21956c49efe2d94c35204c273a086f60b6ae5e61c7bd56ec9fddda5ec8890f78c0ff106b03baba6ced6bfcf733f1e93622721ebf0b966a08
|
7
|
+
data.tar.gz: c217f2635966096b1ab86df7c52d6dd76145359d761b3cccc6829fde018760407e7207b7d6b89e3f709b8a966f82de93964656e64ad32474e4e8e3ad7cea43f8
|
data/Gemfile.lock
CHANGED
data/README.markdown
CHANGED
@@ -39,7 +39,7 @@ Words that are not in the dictionary for the given language (inferred from the `
|
|
39
39
|
|
40
40
|
# Misc
|
41
41
|
|
42
|
-
If you produce content with kramdown (e.g. using Jekyll),
|
42
|
+
If you produce content with kramdown (e.g. using Jekyll), an [Inline Attribute List](https://kramdown.gettalong.org/syntax.html#inline-attribute-lists) can be used to set `spellcheck='false'` for an element by adding this line *after* the element (e.g. heading):
|
43
43
|
|
44
44
|
```
|
45
45
|
{: spellcheck="false"}
|
data/exe/httpspell
CHANGED
@@ -7,6 +7,7 @@ require 'http_spell/spellchecker'
|
|
7
7
|
require 'http_spell/version'
|
8
8
|
|
9
9
|
personal_dictionary_path = nil
|
10
|
+
ignore_file_path = nil
|
10
11
|
force_language = nil
|
11
12
|
tracing = nil
|
12
13
|
verbose = nil
|
@@ -14,6 +15,7 @@ included = nil
|
|
14
15
|
excluded = []
|
15
16
|
|
16
17
|
begin
|
18
|
+
# rubocop:disable Metrics/BlockLength
|
17
19
|
OptionParser.new do |parser|
|
18
20
|
parser.banner.prepend <<~BANNER
|
19
21
|
Spellchecks a website via HTTP.
|
@@ -25,6 +27,10 @@ begin
|
|
25
27
|
personal_dictionary_path = p
|
26
28
|
end
|
27
29
|
|
30
|
+
parser.on('-I', '--ignore=FILE', 'path to a file containing spelling errors to ignore') do |i|
|
31
|
+
ignore_file_path = i
|
32
|
+
end
|
33
|
+
|
28
34
|
parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l|
|
29
35
|
force_language = l
|
30
36
|
end
|
@@ -49,6 +55,7 @@ begin
|
|
49
55
|
# TODO: --recursive, defaults to false
|
50
56
|
# TODO wget has some additional options for recursive behavior that should be reviewed
|
51
57
|
end.parse!
|
58
|
+
# rubocop:enable Metrics/BlockLength
|
52
59
|
rescue StandardError
|
53
60
|
warn "Error: #{$ERROR_INFO}"
|
54
61
|
exit 1
|
@@ -59,18 +66,38 @@ if ARGV.size != 1
|
|
59
66
|
exit 1
|
60
67
|
end
|
61
68
|
|
62
|
-
|
63
|
-
|
69
|
+
# rubocop:disable Metrics/ParameterLists
|
70
|
+
def check(url, doc, lang, personal_dictionary_path, ignore_file_path, verbose)
|
71
|
+
has_unknown_words = false
|
72
|
+
|
73
|
+
# Handle elements with a different lang attribute separately
|
74
|
+
doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
|
75
|
+
has_unknown_words |= check("#{url} => #{element.name} with", element, element['lang'], personal_dictionary_path, ignore_file_path, verbose)
|
76
|
+
element.unlink
|
77
|
+
end
|
78
|
+
|
79
|
+
unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc.to_s, lang)
|
80
|
+
|
81
|
+
if ignore_file_path && unknown_words.any?
|
82
|
+
ignore_words = File.read(ignore_file_path).lines.map(&:chomp)
|
83
|
+
ignored_words = unknown_words.intersection(ignore_words)
|
84
|
+
|
85
|
+
if ignored_words.any?
|
86
|
+
warn "#{url} (lang=#{lang}): Ignoring the following spelling errors because they are in the ignore list: #{ignored_words}" if verbose
|
87
|
+
unknown_words -= ignore_words
|
88
|
+
end
|
89
|
+
end
|
64
90
|
|
65
91
|
if unknown_words.empty?
|
66
92
|
warn "#{url} (lang=#{lang}): No unknown words" if verbose
|
67
|
-
|
93
|
+
has_unknown_words # no unknown words in doc, but maybe in elements with a different language
|
68
94
|
else
|
69
95
|
warn "#{url} (lang=#{lang}): #{unknown_words.size} unknown words:" if verbose
|
70
96
|
puts unknown_words
|
71
|
-
true
|
97
|
+
true # regardless of what elements with a different language had, at least doc has unknown words
|
72
98
|
end
|
73
99
|
end
|
100
|
+
# rubocop:enable Metrics/ParameterLists
|
74
101
|
|
75
102
|
has_unknown_words = false
|
76
103
|
|
@@ -83,14 +110,7 @@ spider_success = HttpSpell::Spider.new(ARGV.first, included:, excluded:, verbose
|
|
83
110
|
doc.css('iframe').each(&:unlink)
|
84
111
|
doc.css('[spellcheck=false]').each(&:unlink)
|
85
112
|
|
86
|
-
#
|
87
|
-
doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
|
88
|
-
has_unknown_words |= check("#{url} => #{element.name} with", element.to_s, element['lang'], personal_dictionary_path, verbose)
|
89
|
-
element.unlink
|
90
|
-
end
|
91
|
-
|
92
|
-
# Everything else
|
93
|
-
has_unknown_words |= check("#{url} => document with", doc.to_s, lang, personal_dictionary_path, verbose)
|
113
|
+
has_unknown_words |= check("#{url} => document with", doc, lang, personal_dictionary_path, ignore_file_path, verbose)
|
94
114
|
end
|
95
115
|
|
96
116
|
exit 2 unless spider_success
|
data/lib/http_spell/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: httpspell
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steffen Uhlig
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|