httpspell 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.markdown +1 -1
- data/exe/httpspell +32 -12
- data/lib/http_spell/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 509242695286e955675a85e15957752f1ac19eba7a5ffda317f6e45fd41c6c01
|
4
|
+
data.tar.gz: 4537ecafb9c882a23024c00246b0c1a07359d5180b2ee052d68a25ea23a64f6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddf6cb8856cf025e21956c49efe2d94c35204c273a086f60b6ae5e61c7bd56ec9fddda5ec8890f78c0ff106b03baba6ced6bfcf733f1e93622721ebf0b966a08
|
7
|
+
data.tar.gz: c217f2635966096b1ab86df7c52d6dd76145359d761b3cccc6829fde018760407e7207b7d6b89e3f709b8a966f82de93964656e64ad32474e4e8e3ad7cea43f8
|
data/Gemfile.lock
CHANGED
data/README.markdown
CHANGED
@@ -39,7 +39,7 @@ Words that are not in the dictionary for the given language (inferred from the `
|
|
39
39
|
|
40
40
|
# Misc
|
41
41
|
|
42
|
-
If you produce content with kramdown (e.g. using Jekyll),
|
42
|
+
If you produce content with kramdown (e.g. using Jekyll), an [Inline Attribute List](https://kramdown.gettalong.org/syntax.html#inline-attribute-lists) can be used to set `spellcheck='false'` for an element by adding this line *after* the element (e.g. heading):
|
43
43
|
|
44
44
|
```
|
45
45
|
{: spellcheck="false"}
|
data/exe/httpspell
CHANGED
@@ -7,6 +7,7 @@ require 'http_spell/spellchecker'
|
|
7
7
|
require 'http_spell/version'
|
8
8
|
|
9
9
|
personal_dictionary_path = nil
|
10
|
+
ignore_file_path = nil
|
10
11
|
force_language = nil
|
11
12
|
tracing = nil
|
12
13
|
verbose = nil
|
@@ -14,6 +15,7 @@ included = nil
|
|
14
15
|
excluded = []
|
15
16
|
|
16
17
|
begin
|
18
|
+
# rubocop:disable Metrics/BlockLength
|
17
19
|
OptionParser.new do |parser|
|
18
20
|
parser.banner.prepend <<~BANNER
|
19
21
|
Spellchecks a website via HTTP.
|
@@ -25,6 +27,10 @@ begin
|
|
25
27
|
personal_dictionary_path = p
|
26
28
|
end
|
27
29
|
|
30
|
+
parser.on('-I', '--ignore=FILE', 'path to a file containing spelling errors to ignore') do |i|
|
31
|
+
ignore_file_path = i
|
32
|
+
end
|
33
|
+
|
28
34
|
parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l|
|
29
35
|
force_language = l
|
30
36
|
end
|
@@ -49,6 +55,7 @@ begin
|
|
49
55
|
# TODO: --recursive, defaults to false
|
50
56
|
# TODO wget has some additional options for recursive behavior that should be reviewed
|
51
57
|
end.parse!
|
58
|
+
# rubocop:enable Metrics/BlockLength
|
52
59
|
rescue StandardError
|
53
60
|
warn "Error: #{$ERROR_INFO}"
|
54
61
|
exit 1
|
@@ -59,18 +66,38 @@ if ARGV.size != 1
|
|
59
66
|
exit 1
|
60
67
|
end
|
61
68
|
|
62
|
-
|
63
|
-
|
69
|
+
# rubocop:disable Metrics/ParameterLists
|
70
|
+
def check(url, doc, lang, personal_dictionary_path, ignore_file_path, verbose)
|
71
|
+
has_unknown_words = false
|
72
|
+
|
73
|
+
# Handle elements with a different lang attribute separately
|
74
|
+
doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
|
75
|
+
has_unknown_words |= check("#{url} => #{element.name} with", element, element['lang'], personal_dictionary_path, ignore_file_path, verbose)
|
76
|
+
element.unlink
|
77
|
+
end
|
78
|
+
|
79
|
+
unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc.to_s, lang)
|
80
|
+
|
81
|
+
if ignore_file_path && unknown_words.any?
|
82
|
+
ignore_words = File.read(ignore_file_path).lines.map(&:chomp)
|
83
|
+
ignored_words = unknown_words.intersection(ignore_words)
|
84
|
+
|
85
|
+
if ignored_words.any?
|
86
|
+
warn "#{url} (lang=#{lang}): Ignoring the following spelling errors because they are in the ignore list: #{ignored_words}" if verbose
|
87
|
+
unknown_words -= ignore_words
|
88
|
+
end
|
89
|
+
end
|
64
90
|
|
65
91
|
if unknown_words.empty?
|
66
92
|
warn "#{url} (lang=#{lang}): No unknown words" if verbose
|
67
|
-
|
93
|
+
has_unknown_words # no unknown words in doc, but maybe in elements with a different language
|
68
94
|
else
|
69
95
|
warn "#{url} (lang=#{lang}): #{unknown_words.size} unknown words:" if verbose
|
70
96
|
puts unknown_words
|
71
|
-
true
|
97
|
+
true # regardless of what elements with a different language had, at least doc has unknown words
|
72
98
|
end
|
73
99
|
end
|
100
|
+
# rubocop:enable Metrics/ParameterLists
|
74
101
|
|
75
102
|
has_unknown_words = false
|
76
103
|
|
@@ -83,14 +110,7 @@ spider_success = HttpSpell::Spider.new(ARGV.first, included:, excluded:, verbose
|
|
83
110
|
doc.css('iframe').each(&:unlink)
|
84
111
|
doc.css('[spellcheck=false]').each(&:unlink)
|
85
112
|
|
86
|
-
#
|
87
|
-
doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
|
88
|
-
has_unknown_words |= check("#{url} => #{element.name} with", element.to_s, element['lang'], personal_dictionary_path, verbose)
|
89
|
-
element.unlink
|
90
|
-
end
|
91
|
-
|
92
|
-
# Everything else
|
93
|
-
has_unknown_words |= check("#{url} => document with", doc.to_s, lang, personal_dictionary_path, verbose)
|
113
|
+
has_unknown_words |= check("#{url} => document with", doc, lang, personal_dictionary_path, ignore_file_path, verbose)
|
94
114
|
end
|
95
115
|
|
96
116
|
exit 2 unless spider_success
|
data/lib/http_spell/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: httpspell
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steffen Uhlig
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|