httpspell 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d66cfcc88c0bc7e0e237033b8c76f1aaccc40f9aba3f68766d45204a2b133401
4
- data.tar.gz: 6c488170f95d0f33fdcbc5c55f2416d654f2b4558214943f21d19e1220f2ad96
3
+ metadata.gz: 509242695286e955675a85e15957752f1ac19eba7a5ffda317f6e45fd41c6c01
4
+ data.tar.gz: 4537ecafb9c882a23024c00246b0c1a07359d5180b2ee052d68a25ea23a64f6f
5
5
  SHA512:
6
- metadata.gz: f493b3411cd162e4a714203b05277f26810d71a0c23e52d69c36297c19e03db7b97692c1e4928c8f8fb0b9bc9a59f04b9ae4113c5cdd309c1edcf2a493d68687
7
- data.tar.gz: ed44c8adf0dcd63330e8e7f837d9f515fd6131e58ae02808a01ed430ef42e5ee258a8c8d5a6fb9552c6b2c0dc460966453d3f1cda6e359739a18b6a01a25cdeb
6
+ metadata.gz: ddf6cb8856cf025e21956c49efe2d94c35204c273a086f60b6ae5e61c7bd56ec9fddda5ec8890f78c0ff106b03baba6ced6bfcf733f1e93622721ebf0b966a08
7
+ data.tar.gz: c217f2635966096b1ab86df7c52d6dd76145359d761b3cccc6829fde018760407e7207b7d6b89e3f709b8a966f82de93964656e64ad32474e4e8e3ad7cea43f8
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpspell (1.5.0)
4
+ httpspell (1.5.1)
5
5
  nokogiri
6
6
 
7
7
  GEM
data/README.markdown CHANGED
@@ -39,7 +39,7 @@ Words that are not in the dictionary for the given language (inferred from the `
39
39
 
40
40
  # Misc
41
41
 
42
- If you produce content with kramdown (e.g. using Jekyll), setting `spellcheck='false'` for an element is a simple as adding this line *after* the element (e.g. heading):
42
+ If you produce content with kramdown (e.g. using Jekyll), an [Inline Attribute List](https://kramdown.gettalong.org/syntax.html#inline-attribute-lists) can be used to set `spellcheck='false'` for an element by adding this line *after* the element (e.g. heading):
43
43
 
44
44
  ```
45
45
  {: spellcheck="false"}
data/exe/httpspell CHANGED
@@ -7,6 +7,7 @@ require 'http_spell/spellchecker'
7
7
  require 'http_spell/version'
8
8
 
9
9
  personal_dictionary_path = nil
10
+ ignore_file_path = nil
10
11
  force_language = nil
11
12
  tracing = nil
12
13
  verbose = nil
@@ -14,6 +15,7 @@ included = nil
14
15
  excluded = []
15
16
 
16
17
  begin
18
+ # rubocop:disable Metrics/BlockLength
17
19
  OptionParser.new do |parser|
18
20
  parser.banner.prepend <<~BANNER
19
21
  Spellchecks a website via HTTP.
@@ -25,6 +27,10 @@ begin
25
27
  personal_dictionary_path = p
26
28
  end
27
29
 
30
+ parser.on('-I', '--ignore=FILE', 'path to a file containing spelling errors to ignore') do |i|
31
+ ignore_file_path = i
32
+ end
33
+
28
34
  parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l|
29
35
  force_language = l
30
36
  end
@@ -49,6 +55,7 @@ begin
49
55
  # TODO: --recursive, defaults to false
50
56
  # TODO wget has some additional options for recursive behavior that should be reviewed
51
57
  end.parse!
58
+ # rubocop:enable Metrics/BlockLength
52
59
  rescue StandardError
53
60
  warn "Error: #{$ERROR_INFO}"
54
61
  exit 1
@@ -59,18 +66,38 @@ if ARGV.size != 1
59
66
  exit 1
60
67
  end
61
68
 
62
- def check(url, doc, lang, personal_dictionary_path, verbose)
63
- unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc, lang)
69
+ # rubocop:disable Metrics/ParameterLists
70
+ def check(url, doc, lang, personal_dictionary_path, ignore_file_path, verbose)
71
+ has_unknown_words = false
72
+
73
+ # Handle elements with a different lang attribute separately
74
+ doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
75
+ has_unknown_words |= check("#{url} => #{element.name} with", element, element['lang'], personal_dictionary_path, ignore_file_path, verbose)
76
+ element.unlink
77
+ end
78
+
79
+ unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc.to_s, lang)
80
+
81
+ if ignore_file_path && unknown_words.any?
82
+ ignore_words = File.read(ignore_file_path).lines.map(&:chomp)
83
+ ignored_words = unknown_words.intersection(ignore_words)
84
+
85
+ if ignored_words.any?
86
+ warn "#{url} (lang=#{lang}): Ignoring the following spelling errors because they are in the ignore list: #{ignored_words}" if verbose
87
+ unknown_words -= ignore_words
88
+ end
89
+ end
64
90
 
65
91
  if unknown_words.empty?
66
92
  warn "#{url} (lang=#{lang}): No unknown words" if verbose
67
- false
93
+ has_unknown_words # no unknown words in doc, but maybe in elements with a different language
68
94
  else
69
95
  warn "#{url} (lang=#{lang}): #{unknown_words.size} unknown words:" if verbose
70
96
  puts unknown_words
71
- true
97
+ true # regardless of what elements with a different language had, at least doc has unknown words
72
98
  end
73
99
  end
100
+ # rubocop:enable Metrics/ParameterLists
74
101
 
75
102
  has_unknown_words = false
76
103
 
@@ -83,14 +110,7 @@ spider_success = HttpSpell::Spider.new(ARGV.first, included:, excluded:, verbose
83
110
  doc.css('iframe').each(&:unlink)
84
111
  doc.css('[spellcheck=false]').each(&:unlink)
85
112
 
86
- # Handle elements with a different lang attribute separately
87
- doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
88
- has_unknown_words |= check("#{url} => #{element.name} with", element.to_s, element['lang'], personal_dictionary_path, verbose)
89
- element.unlink
90
- end
91
-
92
- # Everything else
93
- has_unknown_words |= check("#{url} => document with", doc.to_s, lang, personal_dictionary_path, verbose)
113
+ has_unknown_words |= check("#{url} => document with", doc, lang, personal_dictionary_path, ignore_file_path, verbose)
94
114
  end
95
115
 
96
116
  exit 2 unless spider_success
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HttpSpell
4
- VERSION = '1.5.0'
4
+ VERSION = '1.5.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: httpspell
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steffen Uhlig
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-31 00:00:00.000000000 Z
11
+ date: 2024-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri