httpspell 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec39e7efff90e1b266f19714e6b6ce91a1a714e149be0df491e0e04cf5bef564
4
- data.tar.gz: 58be9917fc0e13ad4653e6687b19b018d30be56b0c0c9d62da8e39ef169264e6
3
+ metadata.gz: dc09324c003c7b14e08fa255b7a31c0a9aeb143df033da9aea300619a47268ba
4
+ data.tar.gz: 6890352a3cef38e243e2506398d58736c8179c2e0443a2b6ff341165e724dba0
5
5
  SHA512:
6
- metadata.gz: 045cc45fc3bd73e5cb4694fec14dec5afad13ab8402cdc90173b621f21a8a11c0b02a375404e0dc332c7e6ec29a571ce3e6d5b152283ad6bb9323161f583579d
7
- data.tar.gz: de4f50971f4d12a29c829b47a14f54d84c005c9ff7d421fcfcd672fdf065eb7db5ad4c27a83933e4d9de4bf4417d3cfbdf881a312416a0c6b0a95018637592f7
6
+ metadata.gz: 826bb8e875b2f1584dd5c052ab9777e616e1da0d6844263589b027c3eabfb07955155e0c43b8b1b8dc253d720eba952e80330c38035fff53fc1943420dea7454
7
+ data.tar.gz: 7a4e3c9aaa586d4fbdc41971424cd5f064793ff18cba8d8606a452b3cee36070af44aa2f78ab307c71a613404cc1e490af1f56eca11068675183625f5360790e
@@ -0,0 +1,14 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: "/"
5
+ schedule:
6
+ interval: daily
7
+ time: "09:00"
8
+ open-pull-requests-limit: 10
9
+ ignore:
10
+ - dependency-name: rubocop
11
+ versions:
12
+ - 1.10.0
13
+ - 1.11.0
14
+ - 1.9.0
data/.gitignore CHANGED
@@ -1 +1,3 @@
1
+ .rake_tasks
1
2
  pkg
3
+ spec/dictionaries
data/.mergify.yml ADDED
@@ -0,0 +1,8 @@
1
+ pull_request_rules:
2
+ - name: automatic merge for Dependabot pull requests
3
+ conditions:
4
+ - author~=^dependabot(|-preview)\[bot\]$
5
+ - check-success=Travis CI - Pull Request
6
+ actions:
7
+ merge:
8
+ method: merge
data/.rubocop.yml CHANGED
@@ -1,6 +1,11 @@
1
+ require:
2
+ - rubocop-rake
3
+ - rubocop-rspec
1
4
  AllCops:
2
- TargetRubyVersion: 2.5.1
5
+ NewCops: enable
6
+ TargetRubyVersion: 3.3
3
7
  Include:
8
+ - '**/*.rb'
4
9
  - '**/Gemfile'
5
10
  - '**/Rakefile'
6
11
  - '**/config.ru'
@@ -8,23 +13,33 @@ AllCops:
8
13
  Exclude:
9
14
  - vendor/**/*
10
15
  - db/migrations/**/*
11
-
12
16
  DisplayCopNames:
13
17
  Enabled: true
14
-
15
18
  DisplayStyleGuide:
16
19
  Enabled: true
17
-
18
20
  Naming/FileName:
19
21
  Exclude:
20
- - Guardfile
21
-
22
+ - Guardfile
22
23
  Metrics/BlockLength:
23
24
  Exclude:
24
25
  - spec/**/*
25
-
26
- Metrics/LineLength:
26
+ Layout/LineLength:
27
27
  Max: 160
28
-
29
28
  Style/Documentation:
30
29
  Enabled: false
30
+ Metrics/AbcSize:
31
+ Enabled: false
32
+ Metrics/MethodLength:
33
+ Enabled: false
34
+ Metrics/CyclomaticComplexity:
35
+ Enabled: false
36
+ Style/TrailingCommaInArrayLiteral:
37
+ Enabled: false
38
+ RSpec/ExampleWording:
39
+ Enabled: false
40
+ RSpec/InstanceVariable:
41
+ AssignmentOnly: true
42
+ RSpec/ExampleLength:
43
+ Max: 10
44
+ Metrics/PerceivedComplexity:
45
+ Max: 16
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-3.3.1
data/Gemfile CHANGED
@@ -2,3 +2,20 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
  gemspec
5
+
6
+ group :development do
7
+ gem 'aruba'
8
+ gem 'bundler'
9
+ gem 'guard'
10
+ gem 'guard-bundler'
11
+ gem 'guard-rspec'
12
+ gem 'httpx'
13
+ gem 'pry'
14
+ gem 'pry-byebug'
15
+ gem 'rake'
16
+ gem 'rspec'
17
+ gem 'rubocop'
18
+ gem 'rubocop-rake'
19
+ gem 'rubocop-rspec'
20
+ gem 'stub_server'
21
+ end
data/Gemfile.lock CHANGED
@@ -1,61 +1,66 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpspell (1.3.0)
4
+ httpspell (1.4.1)
5
5
  addressable
6
6
  nokogiri
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- addressable (2.6.0)
12
- public_suffix (>= 2.0.2, < 4.0)
13
- aruba (0.14.9)
14
- childprocess (>= 0.6.3, < 1.1.0)
15
- contracts (~> 0.9)
16
- cucumber (>= 1.3.19)
17
- ffi (~> 1.9)
18
- rspec-expectations (>= 2.99)
19
- thor (~> 0.19)
20
- ast (2.4.0)
21
- backports (3.13.0)
22
- builder (3.2.3)
23
- byebug (11.0.1)
24
- childprocess (1.0.1)
25
- rake (< 13.0)
26
- coderay (1.1.2)
27
- contracts (0.16.0)
28
- cucumber (3.1.2)
29
- builder (>= 2.1.2)
30
- cucumber-core (~> 3.2.0)
31
- cucumber-expressions (~> 6.0.1)
32
- cucumber-wire (~> 0.0.1)
33
- diff-lcs (~> 1.3)
34
- gherkin (~> 5.1.0)
35
- multi_json (>= 1.7.5, < 2.0)
36
- multi_test (>= 0.1.2)
37
- cucumber-core (3.2.1)
38
- backports (>= 3.8.0)
39
- cucumber-tag_expressions (~> 1.1.0)
40
- gherkin (~> 5.0)
41
- cucumber-expressions (6.0.1)
42
- cucumber-tag_expressions (1.1.1)
43
- cucumber-wire (0.0.1)
44
- diff-lcs (1.3)
45
- ffi (1.10.0)
46
- formatador (0.2.5)
47
- gherkin (5.1.0)
48
- guard (2.15.0)
11
+ addressable (2.8.6)
12
+ public_suffix (>= 2.0.2, < 6.0)
13
+ aruba (2.2.0)
14
+ bundler (>= 1.17, < 3.0)
15
+ contracts (>= 0.16.0, < 0.18.0)
16
+ cucumber (>= 8.0, < 10.0)
17
+ rspec-expectations (~> 3.4)
18
+ thor (~> 1.0)
19
+ ast (2.4.2)
20
+ bigdecimal (3.1.8)
21
+ builder (3.2.4)
22
+ byebug (11.1.3)
23
+ coderay (1.1.3)
24
+ contracts (0.17)
25
+ cucumber (9.2.0)
26
+ builder (~> 3.2)
27
+ cucumber-ci-environment (> 9, < 11)
28
+ cucumber-core (> 13, < 14)
29
+ cucumber-cucumber-expressions (~> 17.0)
30
+ cucumber-gherkin (> 24, < 28)
31
+ cucumber-html-formatter (> 20.3, < 22)
32
+ cucumber-messages (> 19, < 25)
33
+ diff-lcs (~> 1.5)
34
+ mini_mime (~> 1.1)
35
+ multi_test (~> 1.1)
36
+ sys-uname (~> 1.2)
37
+ cucumber-ci-environment (10.0.1)
38
+ cucumber-core (13.0.2)
39
+ cucumber-gherkin (>= 27, < 28)
40
+ cucumber-messages (>= 20, < 23)
41
+ cucumber-tag-expressions (> 5, < 7)
42
+ cucumber-cucumber-expressions (17.1.0)
43
+ bigdecimal
44
+ cucumber-gherkin (27.0.0)
45
+ cucumber-messages (>= 19.1.4, < 23)
46
+ cucumber-html-formatter (21.3.1)
47
+ cucumber-messages (> 19, < 25)
48
+ cucumber-messages (22.0.0)
49
+ cucumber-tag-expressions (6.1.0)
50
+ diff-lcs (1.5.1)
51
+ ffi (1.16.3)
52
+ formatador (1.1.0)
53
+ guard (2.18.1)
49
54
  formatador (>= 0.2.4)
50
55
  listen (>= 2.7, < 4.0)
51
56
  lumberjack (>= 1.0.12, < 2.0)
52
57
  nenv (~> 0.1)
53
58
  notiffany (~> 0.0)
54
- pry (>= 0.9.12)
59
+ pry (>= 0.13.0)
55
60
  shellany (~> 0.0)
56
61
  thor (>= 0.18.1)
57
- guard-bundler (2.2.1)
58
- bundler (>= 1.3.0, < 3)
62
+ guard-bundler (3.0.1)
63
+ bundler (>= 2.1, < 3)
59
64
  guard (~> 2.2)
60
65
  guard-compat (~> 1.1)
61
66
  guard-compat (1.2.1)
@@ -63,70 +68,108 @@ GEM
63
68
  guard (~> 2.1)
64
69
  guard-compat (~> 1.1)
65
70
  rspec (>= 2.99.0, < 4.0)
66
- jaro_winkler (1.5.2)
67
- listen (3.1.5)
68
- rb-fsevent (~> 0.9, >= 0.9.4)
69
- rb-inotify (~> 0.9, >= 0.9.7)
70
- ruby_dep (~> 1.2)
71
- lumberjack (1.0.13)
72
- method_source (0.9.2)
73
- mini_portile2 (2.4.0)
74
- multi_json (1.13.1)
75
- multi_test (0.1.2)
71
+ http-2-next (1.0.3)
72
+ httpx (1.2.5)
73
+ http-2-next (>= 1.0.3)
74
+ json (2.7.2)
75
+ language_server-protocol (3.17.0.3)
76
+ listen (3.9.0)
77
+ rb-fsevent (~> 0.10, >= 0.10.3)
78
+ rb-inotify (~> 0.9, >= 0.9.10)
79
+ lumberjack (1.2.10)
80
+ method_source (1.1.0)
81
+ mini_mime (1.1.5)
82
+ multi_test (1.1.0)
76
83
  nenv (0.3.0)
77
- nokogiri (1.10.2)
78
- mini_portile2 (~> 2.4.0)
79
- notiffany (0.1.1)
84
+ nokogiri (1.16.5-arm64-darwin)
85
+ racc (~> 1.4)
86
+ nokogiri (1.16.5-x86_64-darwin)
87
+ racc (~> 1.4)
88
+ nokogiri (1.16.5-x86_64-linux)
89
+ racc (~> 1.4)
90
+ notiffany (0.1.3)
80
91
  nenv (~> 0.1)
81
92
  shellany (~> 0.0)
82
- parallel (1.17.0)
83
- parser (2.6.2.1)
84
- ast (~> 2.4.0)
85
- pry (0.12.2)
86
- coderay (~> 1.1.0)
87
- method_source (~> 0.9.0)
88
- pry-byebug (3.7.0)
93
+ parallel (1.24.0)
94
+ parser (3.3.1.0)
95
+ ast (~> 2.4.1)
96
+ racc
97
+ pry (0.14.2)
98
+ coderay (~> 1.1)
99
+ method_source (~> 1.0)
100
+ pry-byebug (3.10.1)
89
101
  byebug (~> 11.0)
90
- pry (~> 0.10)
91
- psych (3.1.0)
92
- public_suffix (3.0.3)
93
- rack (2.0.7)
94
- rainbow (3.0.0)
95
- rake (12.3.2)
96
- rb-fsevent (0.10.3)
97
- rb-inotify (0.10.0)
102
+ pry (>= 0.13, < 0.15)
103
+ public_suffix (5.0.5)
104
+ racc (1.8.0)
105
+ rack (3.0.11)
106
+ rackup (0.2.3)
107
+ rack (>= 3.0.0.beta1)
108
+ webrick
109
+ rainbow (3.1.1)
110
+ rake (13.2.1)
111
+ rb-fsevent (0.11.2)
112
+ rb-inotify (0.11.1)
98
113
  ffi (~> 1.0)
99
- rspec (3.8.0)
100
- rspec-core (~> 3.8.0)
101
- rspec-expectations (~> 3.8.0)
102
- rspec-mocks (~> 3.8.0)
103
- rspec-core (3.8.0)
104
- rspec-support (~> 3.8.0)
105
- rspec-expectations (3.8.2)
114
+ regexp_parser (2.9.2)
115
+ rexml (3.2.8)
116
+ strscan (>= 3.0.9)
117
+ rspec (3.13.0)
118
+ rspec-core (~> 3.13.0)
119
+ rspec-expectations (~> 3.13.0)
120
+ rspec-mocks (~> 3.13.0)
121
+ rspec-core (3.13.0)
122
+ rspec-support (~> 3.13.0)
123
+ rspec-expectations (3.13.0)
106
124
  diff-lcs (>= 1.2.0, < 2.0)
107
- rspec-support (~> 3.8.0)
108
- rspec-mocks (3.8.0)
125
+ rspec-support (~> 3.13.0)
126
+ rspec-mocks (3.13.1)
109
127
  diff-lcs (>= 1.2.0, < 2.0)
110
- rspec-support (~> 3.8.0)
111
- rspec-support (3.8.0)
112
- rubocop (0.67.2)
113
- jaro_winkler (~> 1.5.1)
128
+ rspec-support (~> 3.13.0)
129
+ rspec-support (3.13.1)
130
+ rubocop (1.64.0)
131
+ json (~> 2.3)
132
+ language_server-protocol (>= 3.17.0)
114
133
  parallel (~> 1.10)
115
- parser (>= 2.5, != 2.5.1.1)
116
- psych (>= 3.1.0)
134
+ parser (>= 3.3.0.2)
117
135
  rainbow (>= 2.2.2, < 4.0)
136
+ regexp_parser (>= 1.8, < 3.0)
137
+ rexml (>= 3.2.5, < 4.0)
138
+ rubocop-ast (>= 1.31.1, < 2.0)
118
139
  ruby-progressbar (~> 1.7)
119
- unicode-display_width (>= 1.4.0, < 1.6)
120
- ruby-progressbar (1.10.0)
121
- ruby_dep (1.5.0)
140
+ unicode-display_width (>= 2.4.0, < 3.0)
141
+ rubocop-ast (1.31.3)
142
+ parser (>= 3.3.1.0)
143
+ rubocop-capybara (2.20.0)
144
+ rubocop (~> 1.41)
145
+ rubocop-factory_bot (2.25.1)
146
+ rubocop (~> 1.41)
147
+ rubocop-rake (0.6.0)
148
+ rubocop (~> 1.0)
149
+ rubocop-rspec (2.29.2)
150
+ rubocop (~> 1.40)
151
+ rubocop-capybara (~> 2.17)
152
+ rubocop-factory_bot (~> 2.22)
153
+ rubocop-rspec_rails (~> 2.28)
154
+ rubocop-rspec_rails (2.28.3)
155
+ rubocop (~> 1.40)
156
+ ruby-progressbar (1.13.0)
122
157
  shellany (0.0.1)
123
- stub_server (0.4.0)
124
- rack
125
- thor (0.20.3)
126
- unicode-display_width (1.5.0)
158
+ strscan (3.1.0)
159
+ stub_server (0.7.0)
160
+ rackup (~> 0.2.2)
161
+ webrick
162
+ sys-uname (1.2.3)
163
+ ffi (~> 1.1)
164
+ thor (1.3.1)
165
+ unicode-display_width (2.5.0)
166
+ webrick (1.8.1)
127
167
 
128
168
  PLATFORMS
129
- ruby
169
+ arm64-darwin-22
170
+ arm64-darwin-23
171
+ x86_64-darwin-21
172
+ x86_64-linux
130
173
 
131
174
  DEPENDENCIES
132
175
  aruba
@@ -135,12 +178,15 @@ DEPENDENCIES
135
178
  guard-bundler
136
179
  guard-rspec
137
180
  httpspell!
181
+ httpx
138
182
  pry
139
183
  pry-byebug
140
184
  rake
141
185
  rspec
142
186
  rubocop
187
+ rubocop-rake
188
+ rubocop-rspec
143
189
  stub_server
144
190
 
145
191
  BUNDLED WITH
146
- 1.17.2
192
+ 2.5.9
data/Guardfile ADDED
@@ -0,0 +1,25 @@
1
+ guard :bundler do
2
+ require 'guard/bundler'
3
+ require 'guard/bundler/verify'
4
+ helper = Guard::Bundler::Verify.new
5
+
6
+ files = ['Gemfile']
7
+ files += Dir['*.gemspec'] if files.any? { |f| helper.uses_gemspec?(f) }
8
+
9
+ # Assume files are symlinked from somewhere
10
+ files.each { |file| watch(helper.real_path(file)) }
11
+ end
12
+
13
+ guard :rspec, cmd: "bundle exec rspec" do
14
+ require "guard/rspec/dsl"
15
+ dsl = Guard::RSpec::Dsl.new(self)
16
+
17
+ rspec = dsl.rspec
18
+ watch(rspec.spec_helper) { rspec.spec_dir }
19
+ watch(rspec.spec_support) { rspec.spec_dir }
20
+ watch(rspec.spec_files)
21
+
22
+ # Ruby files
23
+ ruby = dsl.ruby
24
+ dsl.watch_spec_files_for(ruby.lib_files)
25
+ end
data/README.markdown CHANGED
@@ -1,7 +1,5 @@
1
1
  # `httpspell`
2
2
 
3
- [![Build Status](https://travis-ci.org/suhlig/httpspell.svg?branch=master)](https://travis-ci.org/suhlig/httpspell)
4
-
5
3
  This is a spellchecker that recursively fetches HTML pages, converts them to plain text (using [pandoc](http://pandoc.org/)), and spellchecks them with [hunspell](https://hunspell.github.io/). Unknown words will be printed to `stdout`, which makes the tool a good candidate for CI pipelines where you might want to take action when a spelling error is found on a web page.
6
4
 
7
5
  Words that are not in the dictionary for the given language (inferred from the `lang` attribute of the HTML document's root element) can be added to a personal dictionary, which will mark the word as correctly spelled.
@@ -46,3 +44,26 @@ If you produce content with kramdown (e.g. using Jekyll), setting `spellcheck='f
46
44
  ```
47
45
  {: spellcheck="false"}
48
46
  ```
47
+
48
+ # Dictionaries
49
+
50
+ Hunspell uses the system dictionary paths; on the Mac this is `~/Library/Spelling/`. Get some dictionaries as explained in the [hunspell](https://github.com/hunspell/hunspell) project:
51
+
52
+ ```command
53
+ $ wget -O ~/Library/Spelling/en_US.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.aff
54
+ $ wget -O ~/Library/Spelling/en_US.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.dic
55
+ ```
56
+
57
+ German:
58
+
59
+ ```command
60
+ $ wget -O ~/Library/Spelling/de_DE.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/de/de_DE_frami.dic
61
+ $ wget -O ~/Library/Spelling/de_DE.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/de/de_DE_frami.aff
62
+ ```
63
+
64
+ Italian (for integration tests):
65
+
66
+ ```command
67
+ $ wget -O ~/Library/Spelling/it_IT.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/it_IT/it_IT.dic
68
+ $ wget -O ~/Library/Spelling/it_IT.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/it_IT/it_IT.aff
69
+ ```
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ task default: ['spec:all']
10
10
 
11
11
  namespace :spec do
12
12
  desc 'Run all specs'
13
- task all: %i[rubocop:auto_correct unit system]
13
+ task all: %i[rubocop:autocorrect unit system]
14
14
 
15
15
  %w[unit system].each do |type|
16
16
  desc "Run #{type} tests"
data/TODO.markdown CHANGED
@@ -1,6 +1,4 @@
1
1
  * Bail out if lang cannot be inferred and is not given on cmdline
2
2
  * exe/httpspell: # TODO: --recursive, defaults to false
3
3
  * exe/httpspell: # TODO wget has some additional options for recursive behavior that should be reviewed
4
- * exe/httpspell: # TODO: Find sections with a lang attribute and handle them separately
5
4
  * lib/httpspell/spider.rb: # TODO Print _which_ entry of the blacklist matches
6
- * lib/httpspell/spider.rb: # TODO Ignore same page links (some anchor)
data/exe/httpspell CHANGED
@@ -2,9 +2,9 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require 'optparse'
5
- require 'httpspell/spider'
6
- require 'httpspell/spellchecker'
7
- require 'httpspell/version'
5
+ require 'http_spell/spider'
6
+ require 'http_spell/spellchecker'
7
+ require 'http_spell/version'
8
8
 
9
9
  personal_dictionary_path = nil
10
10
  force_language = nil
@@ -59,27 +59,38 @@ if ARGV.size != 1
59
59
  exit 1
60
60
  end
61
61
 
62
- spell_checker = HttpSpell::SpellChecker.new(personal_dictionary_path, tracing: tracing)
62
+ def check(doc, lang, personal_dictionary_path, verbose)
63
+ unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc, lang)
64
+
65
+ if unknown_words.empty?
66
+ warn 'No unknown words.' if verbose
67
+ else
68
+ warn "#{unknown_words.size} unknown words:" if verbose
69
+ puts unknown_words
70
+ true
71
+ end
72
+ end
73
+
63
74
  has_unknown_words = false
64
75
 
65
- spider_success = HttpSpell::Spider.new(ARGV.first, whitelist: whitelist, blacklist: blacklist, tracing: tracing).start do |url, doc|
66
- lang = force_language || doc.root['lang'] || ENV['LANGUAGE']
76
+ spider_success = HttpSpell::Spider.new(ARGV.first, whitelist:, blacklist:, verbose:, tracing:).start do |url, doc|
77
+ lang = force_language || doc.root['lang'] || ENV.fetch('LANGUAGE', nil)
78
+ warn "Checking #{url} as #{lang}" if verbose
67
79
 
68
- # Remove sections that are not to be spellchecked
80
+ # Remove elements that are not to be spellchecked
69
81
  doc.css('pre').each(&:unlink)
70
82
  doc.css('code').each(&:unlink)
71
83
  doc.css('[spellcheck=false]').each(&:unlink)
72
84
 
73
- # TODO: Find sections with a lang attribute and handle them separately
74
- unknown_words = spell_checker.check(doc.to_s, lang)
75
-
76
- if unknown_words.empty?
77
- warn "No unknown words (language is #{lang}) at #{url}." if verbose
78
- else
79
- warn "#{unknown_words.size} unknown words (language is #{lang}) at #{url}:" if verbose
80
- puts unknown_words
81
- has_unknown_words = true
85
+ # Handle elements with a different lang attribute separately
86
+ doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
87
+ warn "Handling #{element.name} with lang #{element['lang']}:" if verbose
88
+ has_unknown_words |= check(element.to_s, element['lang'], personal_dictionary_path, verbose)
89
+ element.unlink
82
90
  end
91
+
92
+ # Everything else
93
+ has_unknown_words |= check(doc.to_s, lang, personal_dictionary_path, verbose)
83
94
  end
84
95
 
85
96
  exit 2 unless spider_success
data/httpspell.gemspec CHANGED
@@ -2,9 +2,8 @@
2
2
 
3
3
  lib = File.expand_path('lib', __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
- require 'httpspell/version'
5
+ require 'http_spell/version'
6
6
 
7
- # rubocop:disable Metrics/BlockLength
8
7
  Gem::Specification.new do |spec|
9
8
  spec.name = 'httpspell'
10
9
  spec.version = HttpSpell::VERSION
@@ -13,8 +12,8 @@ Gem::Specification.new do |spec|
13
12
 
14
13
  spec.summary = 'HTTP spellchecker'
15
14
  spec.description = %(httpspell is a spellchecker that recursively fetches
16
- HTML pages, converts them to plain text using pandoc, and
17
- spellchecks them with hunspell.)
15
+ HTML pages, converts them to plain text using pandoc, and
16
+ spellchecks them with hunspell.)
18
17
  spec.license = 'MIT'
19
18
 
20
19
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -26,17 +25,5 @@ Gem::Specification.new do |spec|
26
25
 
27
26
  spec.add_dependency 'addressable'
28
27
  spec.add_dependency 'nokogiri'
29
-
30
- spec.add_development_dependency 'aruba'
31
- spec.add_development_dependency 'bundler'
32
- spec.add_development_dependency 'guard'
33
- spec.add_development_dependency 'guard-bundler'
34
- spec.add_development_dependency 'guard-rspec'
35
- spec.add_development_dependency 'pry'
36
- spec.add_development_dependency 'pry-byebug'
37
- spec.add_development_dependency 'rake'
38
- spec.add_development_dependency 'rspec'
39
- spec.add_development_dependency 'rubocop'
40
- spec.add_development_dependency 'stub_server'
28
+ spec.metadata['rubygems_mfa_required'] = 'true'
41
29
  end
42
- # rubocop:enable Metrics/BlockLength
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module HttpSpell
2
4
  class SpellChecker
3
- def initialize(personal_dictionary_path = nil, tracing: false)
5
+ def initialize(personal_dictionary_path = nil, verbose: false)
4
6
  @personal_dictionary_arg = "-p #{personal_dictionary_path}" if personal_dictionary_path
5
- @tracing = tracing
7
+ @verbose = verbose
6
8
  end
7
9
 
8
10
  def check(doc, lang)
@@ -11,8 +13,8 @@ module HttpSpell
11
13
  "hunspell -d #{translate(lang)} #{@personal_dictionary_arg} -i UTF-8 -l",
12
14
  ]
13
15
 
14
- if @tracing
15
- warn "Piping the HTML document into the following chain of commands:"
16
+ if @verbose
17
+ warn 'Piping the HTML document into the following chain of commands:'
16
18
  warn commands
17
19
  end
18
20
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'nokogiri'
2
4
  require 'open-uri'
3
5
  require 'open3'
@@ -8,12 +10,13 @@ module HttpSpell
8
10
  class Spider
9
11
  attr_reader :todo, :done
10
12
 
11
- def initialize(starting_point, whitelist: nil, blacklist: [], tracing: false)
13
+ def initialize(starting_point, whitelist: nil, blacklist: [], verbose: false, tracing: false)
12
14
  @todo = []
13
15
  @done = []
14
16
  todo << Addressable::URI.parse(starting_point)
15
17
  @whitelist = whitelist || [/^#{starting_point}/]
16
18
  @blacklist = blacklist
19
+ @verbose = verbose
17
20
  @tracing = tracing
18
21
  end
19
22
 
@@ -26,13 +29,13 @@ module HttpSpell
26
29
  begin
27
30
  extracted = links(url) do |u, d|
28
31
  yield u, d if block_given?
29
- rescue
32
+ rescue StandardError
30
33
  warn "Callback error for #{url}: #{$ERROR_INFO}"
31
34
  warn $ERROR_INFO.backtrace if @tracing
32
35
  end
33
36
 
34
37
  done.append(url)
35
- todo.concat(extracted - done - todo)
38
+ todo.concat(extracted - done - todo).uniq!
36
39
  rescue StandardError
37
40
  warn "Skipping #{url} because of #{$ERROR_INFO.message}"
38
41
  warn $ERROR_INFO.backtrace if @tracing
@@ -40,16 +43,16 @@ module HttpSpell
40
43
  end
41
44
  end
42
45
 
43
- return success
46
+ success
44
47
  end
45
48
 
46
49
  private
47
50
 
48
51
  def links(uri)
49
- response = http_get(URI(uri))
52
+ response = http_get(uri)
50
53
 
51
- if response.content_type != 'text/html'
52
- warn "Skipping #{uri} because it is not HTML" if @tracing
54
+ if response.respond_to?(:content_type) && response.content_type != 'text/html'
55
+ warn "Skipping #{uri} because it is not HTML" if @verbose
53
56
  return []
54
57
  end
55
58
 
@@ -60,17 +63,19 @@ module HttpSpell
60
63
  link = uri.join(link) if link.relative?
61
64
 
62
65
  if @whitelist.none? { |re| re.match?(link.to_s) }
63
- warn "Skipping #{link} because it is not on the whitelist #{@whitelist}" if @tracing
66
+ warn "Skipping #{link} because it is not on the whitelist #{@whitelist}" if @verbose
64
67
  next
65
68
  end
66
69
 
67
70
  if @blacklist.any? { |re| re.match?(link.to_s) }
68
- # TODO Print _which_ entry of the blacklist matches
69
- warn "Skipping #{link} because it is on the blacklist #{@blacklist}" if @tracing
71
+ # TODO: Print _which_ entry of the blacklist matches
72
+ warn "Skipping #{link} because it is on the blacklist #{@blacklist}" if @verbose
70
73
  next
71
74
  end
72
75
 
73
- # TODO Ignore same page links (some anchor)
76
+ # Ignore fragment; we always check the whole page
77
+ link.fragment = nil
78
+
74
79
  link
75
80
  rescue StandardError
76
81
  warn $ERROR_INFO.message
@@ -79,7 +84,7 @@ module HttpSpell
79
84
 
80
85
  yield uri, doc if block_given?
81
86
 
82
- warn "Adding #{links.size} links from #{uri}" if @tracing
87
+ warn "Adding #{links.size} links from #{uri}" if @verbose
83
88
  links
84
89
  end
85
90
 
@@ -88,10 +93,10 @@ module HttpSpell
88
93
  tries = 10
89
94
 
90
95
  begin
91
- uri.open(redirect: false)
92
- rescue OpenURI::HTTPRedirect => redirect
93
- uri = redirect.uri
94
- retry if (tries -= 1) > 0
96
+ URI.parse(uri).open(redirect: false)
97
+ rescue OpenURI::HTTPRedirect => e
98
+ uri = e.uri
99
+ retry if (tries -= 1).positive?
95
100
  raise
96
101
  end
97
102
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HttpSpell
4
- VERSION = '1.3.0'
4
+ VERSION = '1.4.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: httpspell
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steffen Uhlig
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-12 00:00:00.000000000 Z
11
+ date: 2024-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -38,164 +38,10 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: aruba
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: bundler
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: guard
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: guard-bundler
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: guard-rspec
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: pry
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - ">="
116
- - !ruby/object:Gem::Version
117
- version: '0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: '0'
125
- - !ruby/object:Gem::Dependency
126
- name: pry-byebug
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - ">="
130
- - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - ">="
137
- - !ruby/object:Gem::Version
138
- version: '0'
139
- - !ruby/object:Gem::Dependency
140
- name: rake
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - ">="
144
- - !ruby/object:Gem::Version
145
- version: '0'
146
- type: :development
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - ">="
151
- - !ruby/object:Gem::Version
152
- version: '0'
153
- - !ruby/object:Gem::Dependency
154
- name: rspec
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - ">="
158
- - !ruby/object:Gem::Version
159
- version: '0'
160
- type: :development
161
- prerelease: false
162
- version_requirements: !ruby/object:Gem::Requirement
163
- requirements:
164
- - - ">="
165
- - !ruby/object:Gem::Version
166
- version: '0'
167
- - !ruby/object:Gem::Dependency
168
- name: rubocop
169
- requirement: !ruby/object:Gem::Requirement
170
- requirements:
171
- - - ">="
172
- - !ruby/object:Gem::Version
173
- version: '0'
174
- type: :development
175
- prerelease: false
176
- version_requirements: !ruby/object:Gem::Requirement
177
- requirements:
178
- - - ">="
179
- - !ruby/object:Gem::Version
180
- version: '0'
181
- - !ruby/object:Gem::Dependency
182
- name: stub_server
183
- requirement: !ruby/object:Gem::Requirement
184
- requirements:
185
- - - ">="
186
- - !ruby/object:Gem::Version
187
- version: '0'
188
- type: :development
189
- prerelease: false
190
- version_requirements: !ruby/object:Gem::Requirement
191
- requirements:
192
- - - ">="
193
- - !ruby/object:Gem::Version
194
- version: '0'
195
41
  description: |-
196
42
  httpspell is a spellchecker that recursively fetches
197
- HTML pages, converts them to plain text using pandoc, and
198
- spellchecks them with hunspell.
43
+ HTML pages, converts them to plain text using pandoc, and
44
+ spellchecks them with hunspell.
199
45
  email:
200
46
  - steffen@familie-uhlig.net
201
47
  executables:
@@ -203,25 +49,29 @@ executables:
203
49
  extensions: []
204
50
  extra_rdoc_files: []
205
51
  files:
52
+ - ".github/dependabot.yml"
206
53
  - ".gitignore"
54
+ - ".mergify.yml"
207
55
  - ".rspec"
208
56
  - ".rubocop.yml"
209
- - ".travis.yml"
57
+ - ".ruby-version"
210
58
  - Gemfile
211
59
  - Gemfile.lock
60
+ - Guardfile
212
61
  - README.markdown
213
62
  - Rakefile
214
63
  - TODO.markdown
215
64
  - exe/httpspell
216
65
  - httpspell.gemspec
217
- - lib/httpspell/spellchecker.rb
218
- - lib/httpspell/spider.rb
219
- - lib/httpspell/version.rb
220
- homepage:
66
+ - lib/http_spell/spellchecker.rb
67
+ - lib/http_spell/spider.rb
68
+ - lib/http_spell/version.rb
69
+ homepage:
221
70
  licenses:
222
71
  - MIT
223
- metadata: {}
224
- post_install_message:
72
+ metadata:
73
+ rubygems_mfa_required: 'true'
74
+ post_install_message:
225
75
  rdoc_options: []
226
76
  require_paths:
227
77
  - lib
@@ -236,8 +86,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
236
86
  - !ruby/object:Gem::Version
237
87
  version: '0'
238
88
  requirements: []
239
- rubygems_version: 3.0.1
240
- signing_key:
89
+ rubygems_version: 3.5.9
90
+ signing_key:
241
91
  specification_version: 4
242
92
  summary: HTTP spellchecker
243
93
  test_files: []
data/.travis.yml DELETED
@@ -1,6 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.5.1
4
- before_install:
5
- - sudo apt-get -qq update
6
- - sudo apt-get install -y pandoc hunspell hunspell-en-us