httpspell 1.3.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec39e7efff90e1b266f19714e6b6ce91a1a714e149be0df491e0e04cf5bef564
4
- data.tar.gz: 58be9917fc0e13ad4653e6687b19b018d30be56b0c0c9d62da8e39ef169264e6
3
+ metadata.gz: dc09324c003c7b14e08fa255b7a31c0a9aeb143df033da9aea300619a47268ba
4
+ data.tar.gz: 6890352a3cef38e243e2506398d58736c8179c2e0443a2b6ff341165e724dba0
5
5
  SHA512:
6
- metadata.gz: 045cc45fc3bd73e5cb4694fec14dec5afad13ab8402cdc90173b621f21a8a11c0b02a375404e0dc332c7e6ec29a571ce3e6d5b152283ad6bb9323161f583579d
7
- data.tar.gz: de4f50971f4d12a29c829b47a14f54d84c005c9ff7d421fcfcd672fdf065eb7db5ad4c27a83933e4d9de4bf4417d3cfbdf881a312416a0c6b0a95018637592f7
6
+ metadata.gz: 826bb8e875b2f1584dd5c052ab9777e616e1da0d6844263589b027c3eabfb07955155e0c43b8b1b8dc253d720eba952e80330c38035fff53fc1943420dea7454
7
+ data.tar.gz: 7a4e3c9aaa586d4fbdc41971424cd5f064793ff18cba8d8606a452b3cee36070af44aa2f78ab307c71a613404cc1e490af1f56eca11068675183625f5360790e
@@ -0,0 +1,14 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: "/"
5
+ schedule:
6
+ interval: daily
7
+ time: "09:00"
8
+ open-pull-requests-limit: 10
9
+ ignore:
10
+ - dependency-name: rubocop
11
+ versions:
12
+ - 1.10.0
13
+ - 1.11.0
14
+ - 1.9.0
data/.gitignore CHANGED
@@ -1 +1,3 @@
1
+ .rake_tasks
1
2
  pkg
3
+ spec/dictionaries
data/.mergify.yml ADDED
@@ -0,0 +1,8 @@
1
+ pull_request_rules:
2
+ - name: automatic merge for Dependabot pull requests
3
+ conditions:
4
+ - author~=^dependabot(|-preview)\[bot\]$
5
+ - check-success=Travis CI - Pull Request
6
+ actions:
7
+ merge:
8
+ method: merge
data/.rubocop.yml CHANGED
@@ -1,6 +1,11 @@
1
+ require:
2
+ - rubocop-rake
3
+ - rubocop-rspec
1
4
  AllCops:
2
- TargetRubyVersion: 2.5.1
5
+ NewCops: enable
6
+ TargetRubyVersion: 3.3
3
7
  Include:
8
+ - '**/*.rb'
4
9
  - '**/Gemfile'
5
10
  - '**/Rakefile'
6
11
  - '**/config.ru'
@@ -8,23 +13,33 @@ AllCops:
8
13
  Exclude:
9
14
  - vendor/**/*
10
15
  - db/migrations/**/*
11
-
12
16
  DisplayCopNames:
13
17
  Enabled: true
14
-
15
18
  DisplayStyleGuide:
16
19
  Enabled: true
17
-
18
20
  Naming/FileName:
19
21
  Exclude:
20
- - Guardfile
21
-
22
+ - Guardfile
22
23
  Metrics/BlockLength:
23
24
  Exclude:
24
25
  - spec/**/*
25
-
26
- Metrics/LineLength:
26
+ Layout/LineLength:
27
27
  Max: 160
28
-
29
28
  Style/Documentation:
30
29
  Enabled: false
30
+ Metrics/AbcSize:
31
+ Enabled: false
32
+ Metrics/MethodLength:
33
+ Enabled: false
34
+ Metrics/CyclomaticComplexity:
35
+ Enabled: false
36
+ Style/TrailingCommaInArrayLiteral:
37
+ Enabled: false
38
+ RSpec/ExampleWording:
39
+ Enabled: false
40
+ RSpec/InstanceVariable:
41
+ AssignmentOnly: true
42
+ RSpec/ExampleLength:
43
+ Max: 10
44
+ Metrics/PerceivedComplexity:
45
+ Max: 16
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-3.3.1
data/Gemfile CHANGED
@@ -2,3 +2,20 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
  gemspec
5
+
6
+ group :development do
7
+ gem 'aruba'
8
+ gem 'bundler'
9
+ gem 'guard'
10
+ gem 'guard-bundler'
11
+ gem 'guard-rspec'
12
+ gem 'httpx'
13
+ gem 'pry'
14
+ gem 'pry-byebug'
15
+ gem 'rake'
16
+ gem 'rspec'
17
+ gem 'rubocop'
18
+ gem 'rubocop-rake'
19
+ gem 'rubocop-rspec'
20
+ gem 'stub_server'
21
+ end
data/Gemfile.lock CHANGED
@@ -1,61 +1,66 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- httpspell (1.3.0)
4
+ httpspell (1.4.1)
5
5
  addressable
6
6
  nokogiri
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- addressable (2.6.0)
12
- public_suffix (>= 2.0.2, < 4.0)
13
- aruba (0.14.9)
14
- childprocess (>= 0.6.3, < 1.1.0)
15
- contracts (~> 0.9)
16
- cucumber (>= 1.3.19)
17
- ffi (~> 1.9)
18
- rspec-expectations (>= 2.99)
19
- thor (~> 0.19)
20
- ast (2.4.0)
21
- backports (3.13.0)
22
- builder (3.2.3)
23
- byebug (11.0.1)
24
- childprocess (1.0.1)
25
- rake (< 13.0)
26
- coderay (1.1.2)
27
- contracts (0.16.0)
28
- cucumber (3.1.2)
29
- builder (>= 2.1.2)
30
- cucumber-core (~> 3.2.0)
31
- cucumber-expressions (~> 6.0.1)
32
- cucumber-wire (~> 0.0.1)
33
- diff-lcs (~> 1.3)
34
- gherkin (~> 5.1.0)
35
- multi_json (>= 1.7.5, < 2.0)
36
- multi_test (>= 0.1.2)
37
- cucumber-core (3.2.1)
38
- backports (>= 3.8.0)
39
- cucumber-tag_expressions (~> 1.1.0)
40
- gherkin (~> 5.0)
41
- cucumber-expressions (6.0.1)
42
- cucumber-tag_expressions (1.1.1)
43
- cucumber-wire (0.0.1)
44
- diff-lcs (1.3)
45
- ffi (1.10.0)
46
- formatador (0.2.5)
47
- gherkin (5.1.0)
48
- guard (2.15.0)
11
+ addressable (2.8.6)
12
+ public_suffix (>= 2.0.2, < 6.0)
13
+ aruba (2.2.0)
14
+ bundler (>= 1.17, < 3.0)
15
+ contracts (>= 0.16.0, < 0.18.0)
16
+ cucumber (>= 8.0, < 10.0)
17
+ rspec-expectations (~> 3.4)
18
+ thor (~> 1.0)
19
+ ast (2.4.2)
20
+ bigdecimal (3.1.8)
21
+ builder (3.2.4)
22
+ byebug (11.1.3)
23
+ coderay (1.1.3)
24
+ contracts (0.17)
25
+ cucumber (9.2.0)
26
+ builder (~> 3.2)
27
+ cucumber-ci-environment (> 9, < 11)
28
+ cucumber-core (> 13, < 14)
29
+ cucumber-cucumber-expressions (~> 17.0)
30
+ cucumber-gherkin (> 24, < 28)
31
+ cucumber-html-formatter (> 20.3, < 22)
32
+ cucumber-messages (> 19, < 25)
33
+ diff-lcs (~> 1.5)
34
+ mini_mime (~> 1.1)
35
+ multi_test (~> 1.1)
36
+ sys-uname (~> 1.2)
37
+ cucumber-ci-environment (10.0.1)
38
+ cucumber-core (13.0.2)
39
+ cucumber-gherkin (>= 27, < 28)
40
+ cucumber-messages (>= 20, < 23)
41
+ cucumber-tag-expressions (> 5, < 7)
42
+ cucumber-cucumber-expressions (17.1.0)
43
+ bigdecimal
44
+ cucumber-gherkin (27.0.0)
45
+ cucumber-messages (>= 19.1.4, < 23)
46
+ cucumber-html-formatter (21.3.1)
47
+ cucumber-messages (> 19, < 25)
48
+ cucumber-messages (22.0.0)
49
+ cucumber-tag-expressions (6.1.0)
50
+ diff-lcs (1.5.1)
51
+ ffi (1.16.3)
52
+ formatador (1.1.0)
53
+ guard (2.18.1)
49
54
  formatador (>= 0.2.4)
50
55
  listen (>= 2.7, < 4.0)
51
56
  lumberjack (>= 1.0.12, < 2.0)
52
57
  nenv (~> 0.1)
53
58
  notiffany (~> 0.0)
54
- pry (>= 0.9.12)
59
+ pry (>= 0.13.0)
55
60
  shellany (~> 0.0)
56
61
  thor (>= 0.18.1)
57
- guard-bundler (2.2.1)
58
- bundler (>= 1.3.0, < 3)
62
+ guard-bundler (3.0.1)
63
+ bundler (>= 2.1, < 3)
59
64
  guard (~> 2.2)
60
65
  guard-compat (~> 1.1)
61
66
  guard-compat (1.2.1)
@@ -63,70 +68,108 @@ GEM
63
68
  guard (~> 2.1)
64
69
  guard-compat (~> 1.1)
65
70
  rspec (>= 2.99.0, < 4.0)
66
- jaro_winkler (1.5.2)
67
- listen (3.1.5)
68
- rb-fsevent (~> 0.9, >= 0.9.4)
69
- rb-inotify (~> 0.9, >= 0.9.7)
70
- ruby_dep (~> 1.2)
71
- lumberjack (1.0.13)
72
- method_source (0.9.2)
73
- mini_portile2 (2.4.0)
74
- multi_json (1.13.1)
75
- multi_test (0.1.2)
71
+ http-2-next (1.0.3)
72
+ httpx (1.2.5)
73
+ http-2-next (>= 1.0.3)
74
+ json (2.7.2)
75
+ language_server-protocol (3.17.0.3)
76
+ listen (3.9.0)
77
+ rb-fsevent (~> 0.10, >= 0.10.3)
78
+ rb-inotify (~> 0.9, >= 0.9.10)
79
+ lumberjack (1.2.10)
80
+ method_source (1.1.0)
81
+ mini_mime (1.1.5)
82
+ multi_test (1.1.0)
76
83
  nenv (0.3.0)
77
- nokogiri (1.10.2)
78
- mini_portile2 (~> 2.4.0)
79
- notiffany (0.1.1)
84
+ nokogiri (1.16.5-arm64-darwin)
85
+ racc (~> 1.4)
86
+ nokogiri (1.16.5-x86_64-darwin)
87
+ racc (~> 1.4)
88
+ nokogiri (1.16.5-x86_64-linux)
89
+ racc (~> 1.4)
90
+ notiffany (0.1.3)
80
91
  nenv (~> 0.1)
81
92
  shellany (~> 0.0)
82
- parallel (1.17.0)
83
- parser (2.6.2.1)
84
- ast (~> 2.4.0)
85
- pry (0.12.2)
86
- coderay (~> 1.1.0)
87
- method_source (~> 0.9.0)
88
- pry-byebug (3.7.0)
93
+ parallel (1.24.0)
94
+ parser (3.3.1.0)
95
+ ast (~> 2.4.1)
96
+ racc
97
+ pry (0.14.2)
98
+ coderay (~> 1.1)
99
+ method_source (~> 1.0)
100
+ pry-byebug (3.10.1)
89
101
  byebug (~> 11.0)
90
- pry (~> 0.10)
91
- psych (3.1.0)
92
- public_suffix (3.0.3)
93
- rack (2.0.7)
94
- rainbow (3.0.0)
95
- rake (12.3.2)
96
- rb-fsevent (0.10.3)
97
- rb-inotify (0.10.0)
102
+ pry (>= 0.13, < 0.15)
103
+ public_suffix (5.0.5)
104
+ racc (1.8.0)
105
+ rack (3.0.11)
106
+ rackup (0.2.3)
107
+ rack (>= 3.0.0.beta1)
108
+ webrick
109
+ rainbow (3.1.1)
110
+ rake (13.2.1)
111
+ rb-fsevent (0.11.2)
112
+ rb-inotify (0.11.1)
98
113
  ffi (~> 1.0)
99
- rspec (3.8.0)
100
- rspec-core (~> 3.8.0)
101
- rspec-expectations (~> 3.8.0)
102
- rspec-mocks (~> 3.8.0)
103
- rspec-core (3.8.0)
104
- rspec-support (~> 3.8.0)
105
- rspec-expectations (3.8.2)
114
+ regexp_parser (2.9.2)
115
+ rexml (3.2.8)
116
+ strscan (>= 3.0.9)
117
+ rspec (3.13.0)
118
+ rspec-core (~> 3.13.0)
119
+ rspec-expectations (~> 3.13.0)
120
+ rspec-mocks (~> 3.13.0)
121
+ rspec-core (3.13.0)
122
+ rspec-support (~> 3.13.0)
123
+ rspec-expectations (3.13.0)
106
124
  diff-lcs (>= 1.2.0, < 2.0)
107
- rspec-support (~> 3.8.0)
108
- rspec-mocks (3.8.0)
125
+ rspec-support (~> 3.13.0)
126
+ rspec-mocks (3.13.1)
109
127
  diff-lcs (>= 1.2.0, < 2.0)
110
- rspec-support (~> 3.8.0)
111
- rspec-support (3.8.0)
112
- rubocop (0.67.2)
113
- jaro_winkler (~> 1.5.1)
128
+ rspec-support (~> 3.13.0)
129
+ rspec-support (3.13.1)
130
+ rubocop (1.64.0)
131
+ json (~> 2.3)
132
+ language_server-protocol (>= 3.17.0)
114
133
  parallel (~> 1.10)
115
- parser (>= 2.5, != 2.5.1.1)
116
- psych (>= 3.1.0)
134
+ parser (>= 3.3.0.2)
117
135
  rainbow (>= 2.2.2, < 4.0)
136
+ regexp_parser (>= 1.8, < 3.0)
137
+ rexml (>= 3.2.5, < 4.0)
138
+ rubocop-ast (>= 1.31.1, < 2.0)
118
139
  ruby-progressbar (~> 1.7)
119
- unicode-display_width (>= 1.4.0, < 1.6)
120
- ruby-progressbar (1.10.0)
121
- ruby_dep (1.5.0)
140
+ unicode-display_width (>= 2.4.0, < 3.0)
141
+ rubocop-ast (1.31.3)
142
+ parser (>= 3.3.1.0)
143
+ rubocop-capybara (2.20.0)
144
+ rubocop (~> 1.41)
145
+ rubocop-factory_bot (2.25.1)
146
+ rubocop (~> 1.41)
147
+ rubocop-rake (0.6.0)
148
+ rubocop (~> 1.0)
149
+ rubocop-rspec (2.29.2)
150
+ rubocop (~> 1.40)
151
+ rubocop-capybara (~> 2.17)
152
+ rubocop-factory_bot (~> 2.22)
153
+ rubocop-rspec_rails (~> 2.28)
154
+ rubocop-rspec_rails (2.28.3)
155
+ rubocop (~> 1.40)
156
+ ruby-progressbar (1.13.0)
122
157
  shellany (0.0.1)
123
- stub_server (0.4.0)
124
- rack
125
- thor (0.20.3)
126
- unicode-display_width (1.5.0)
158
+ strscan (3.1.0)
159
+ stub_server (0.7.0)
160
+ rackup (~> 0.2.2)
161
+ webrick
162
+ sys-uname (1.2.3)
163
+ ffi (~> 1.1)
164
+ thor (1.3.1)
165
+ unicode-display_width (2.5.0)
166
+ webrick (1.8.1)
127
167
 
128
168
  PLATFORMS
129
- ruby
169
+ arm64-darwin-22
170
+ arm64-darwin-23
171
+ x86_64-darwin-21
172
+ x86_64-linux
130
173
 
131
174
  DEPENDENCIES
132
175
  aruba
@@ -135,12 +178,15 @@ DEPENDENCIES
135
178
  guard-bundler
136
179
  guard-rspec
137
180
  httpspell!
181
+ httpx
138
182
  pry
139
183
  pry-byebug
140
184
  rake
141
185
  rspec
142
186
  rubocop
187
+ rubocop-rake
188
+ rubocop-rspec
143
189
  stub_server
144
190
 
145
191
  BUNDLED WITH
146
- 1.17.2
192
+ 2.5.9
data/Guardfile ADDED
@@ -0,0 +1,25 @@
1
+ guard :bundler do
2
+ require 'guard/bundler'
3
+ require 'guard/bundler/verify'
4
+ helper = Guard::Bundler::Verify.new
5
+
6
+ files = ['Gemfile']
7
+ files += Dir['*.gemspec'] if files.any? { |f| helper.uses_gemspec?(f) }
8
+
9
+ # Assume files are symlinked from somewhere
10
+ files.each { |file| watch(helper.real_path(file)) }
11
+ end
12
+
13
+ guard :rspec, cmd: "bundle exec rspec" do
14
+ require "guard/rspec/dsl"
15
+ dsl = Guard::RSpec::Dsl.new(self)
16
+
17
+ rspec = dsl.rspec
18
+ watch(rspec.spec_helper) { rspec.spec_dir }
19
+ watch(rspec.spec_support) { rspec.spec_dir }
20
+ watch(rspec.spec_files)
21
+
22
+ # Ruby files
23
+ ruby = dsl.ruby
24
+ dsl.watch_spec_files_for(ruby.lib_files)
25
+ end
data/README.markdown CHANGED
@@ -1,7 +1,5 @@
1
1
  # `httpspell`
2
2
 
3
- [![Build Status](https://travis-ci.org/suhlig/httpspell.svg?branch=master)](https://travis-ci.org/suhlig/httpspell)
4
-
5
3
  This is a spellchecker that recursively fetches HTML pages, converts them to plain text (using [pandoc](http://pandoc.org/)), and spellchecks them with [hunspell](https://hunspell.github.io/). Unknown words will be printed to `stdout`, which makes the tool a good candidate for CI pipelines where you might want to take action when a spelling error is found on a web page.
6
4
 
7
5
  Words that are not in the dictionary for the given language (inferred from the `lang` attribute of the HTML document's root element) can be added to a personal dictionary, which will mark the word as correctly spelled.
@@ -46,3 +44,26 @@ If you produce content with kramdown (e.g. using Jekyll), setting `spellcheck='f
46
44
  ```
47
45
  {: spellcheck="false"}
48
46
  ```
47
+
48
+ # Dictionaries
49
+
50
+ Hunspell uses the system dictionary paths; on the Mac this is `~/Library/Spelling/`. Get some dictionaries as explained in the [hunspell](https://github.com/hunspell/hunspell) project:
51
+
52
+ ```command
53
+ $ wget -O ~/Library/Spelling/en_US.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.aff
54
+ $ wget -O ~/Library/Spelling/en_US.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.dic
55
+ ```
56
+
57
+ German:
58
+
59
+ ```command
60
+ $ wget -O ~/Library/Spelling/de_DE.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/de/de_DE_frami.dic
61
+ $ wget -O ~/Library/Spelling/de_DE.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/de/de_DE_frami.aff
62
+ ```
63
+
64
+ Italian (for integration tests):
65
+
66
+ ```command
67
+ $ wget -O ~/Library/Spelling/it_IT.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/it_IT/it_IT.dic
68
+ $ wget -O ~/Library/Spelling/it_IT.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/it_IT/it_IT.aff
69
+ ```
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ task default: ['spec:all']
10
10
 
11
11
  namespace :spec do
12
12
  desc 'Run all specs'
13
- task all: %i[rubocop:auto_correct unit system]
13
+ task all: %i[rubocop:autocorrect unit system]
14
14
 
15
15
  %w[unit system].each do |type|
16
16
  desc "Run #{type} tests"
data/TODO.markdown CHANGED
@@ -1,6 +1,4 @@
1
1
  * Bail out if lang cannot be inferred and is not given on cmdline
2
2
  * exe/httpspell: # TODO: --recursive, defaults to false
3
3
  * exe/httpspell: # TODO wget has some additional options for recursive behavior that should be reviewed
4
- * exe/httpspell: # TODO: Find sections with a lang attribute and handle them separately
5
4
  * lib/httpspell/spider.rb: # TODO Print _which_ entry of the blacklist matches
6
- * lib/httpspell/spider.rb: # TODO Ignore same page links (some anchor)
data/exe/httpspell CHANGED
@@ -2,9 +2,9 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require 'optparse'
5
- require 'httpspell/spider'
6
- require 'httpspell/spellchecker'
7
- require 'httpspell/version'
5
+ require 'http_spell/spider'
6
+ require 'http_spell/spellchecker'
7
+ require 'http_spell/version'
8
8
 
9
9
  personal_dictionary_path = nil
10
10
  force_language = nil
@@ -59,27 +59,38 @@ if ARGV.size != 1
59
59
  exit 1
60
60
  end
61
61
 
62
- spell_checker = HttpSpell::SpellChecker.new(personal_dictionary_path, tracing: tracing)
62
+ def check(doc, lang, personal_dictionary_path, verbose)
63
+ unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc, lang)
64
+
65
+ if unknown_words.empty?
66
+ warn 'No unknown words.' if verbose
67
+ else
68
+ warn "#{unknown_words.size} unknown words:" if verbose
69
+ puts unknown_words
70
+ true
71
+ end
72
+ end
73
+
63
74
  has_unknown_words = false
64
75
 
65
- spider_success = HttpSpell::Spider.new(ARGV.first, whitelist: whitelist, blacklist: blacklist, tracing: tracing).start do |url, doc|
66
- lang = force_language || doc.root['lang'] || ENV['LANGUAGE']
76
+ spider_success = HttpSpell::Spider.new(ARGV.first, whitelist:, blacklist:, verbose:, tracing:).start do |url, doc|
77
+ lang = force_language || doc.root['lang'] || ENV.fetch('LANGUAGE', nil)
78
+ warn "Checking #{url} as #{lang}" if verbose
67
79
 
68
- # Remove sections that are not to be spellchecked
80
+ # Remove elements that are not to be spellchecked
69
81
  doc.css('pre').each(&:unlink)
70
82
  doc.css('code').each(&:unlink)
71
83
  doc.css('[spellcheck=false]').each(&:unlink)
72
84
 
73
- # TODO: Find sections with a lang attribute and handle them separately
74
- unknown_words = spell_checker.check(doc.to_s, lang)
75
-
76
- if unknown_words.empty?
77
- warn "No unknown words (language is #{lang}) at #{url}." if verbose
78
- else
79
- warn "#{unknown_words.size} unknown words (language is #{lang}) at #{url}:" if verbose
80
- puts unknown_words
81
- has_unknown_words = true
85
+ # Handle elements with a different lang attribute separately
86
+ doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
87
+ warn "Handling #{element.name} with lang #{element['lang']}:" if verbose
88
+ has_unknown_words |= check(element.to_s, element['lang'], personal_dictionary_path, verbose)
89
+ element.unlink
82
90
  end
91
+
92
+ # Everything else
93
+ has_unknown_words |= check(doc.to_s, lang, personal_dictionary_path, verbose)
83
94
  end
84
95
 
85
96
  exit 2 unless spider_success
data/httpspell.gemspec CHANGED
@@ -2,9 +2,8 @@
2
2
 
3
3
  lib = File.expand_path('lib', __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
- require 'httpspell/version'
5
+ require 'http_spell/version'
6
6
 
7
- # rubocop:disable Metrics/BlockLength
8
7
  Gem::Specification.new do |spec|
9
8
  spec.name = 'httpspell'
10
9
  spec.version = HttpSpell::VERSION
@@ -13,8 +12,8 @@ Gem::Specification.new do |spec|
13
12
 
14
13
  spec.summary = 'HTTP spellchecker'
15
14
  spec.description = %(httpspell is a spellchecker that recursively fetches
16
- HTML pages, converts them to plain text using pandoc, and
17
- spellchecks them with hunspell.)
15
+ HTML pages, converts them to plain text using pandoc, and
16
+ spellchecks them with hunspell.)
18
17
  spec.license = 'MIT'
19
18
 
20
19
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -26,17 +25,5 @@ Gem::Specification.new do |spec|
26
25
 
27
26
  spec.add_dependency 'addressable'
28
27
  spec.add_dependency 'nokogiri'
29
-
30
- spec.add_development_dependency 'aruba'
31
- spec.add_development_dependency 'bundler'
32
- spec.add_development_dependency 'guard'
33
- spec.add_development_dependency 'guard-bundler'
34
- spec.add_development_dependency 'guard-rspec'
35
- spec.add_development_dependency 'pry'
36
- spec.add_development_dependency 'pry-byebug'
37
- spec.add_development_dependency 'rake'
38
- spec.add_development_dependency 'rspec'
39
- spec.add_development_dependency 'rubocop'
40
- spec.add_development_dependency 'stub_server'
28
+ spec.metadata['rubygems_mfa_required'] = 'true'
41
29
  end
42
- # rubocop:enable Metrics/BlockLength
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module HttpSpell
2
4
  class SpellChecker
3
- def initialize(personal_dictionary_path = nil, tracing: false)
5
+ def initialize(personal_dictionary_path = nil, verbose: false)
4
6
  @personal_dictionary_arg = "-p #{personal_dictionary_path}" if personal_dictionary_path
5
- @tracing = tracing
7
+ @verbose = verbose
6
8
  end
7
9
 
8
10
  def check(doc, lang)
@@ -11,8 +13,8 @@ module HttpSpell
11
13
  "hunspell -d #{translate(lang)} #{@personal_dictionary_arg} -i UTF-8 -l",
12
14
  ]
13
15
 
14
- if @tracing
15
- warn "Piping the HTML document into the following chain of commands:"
16
+ if @verbose
17
+ warn 'Piping the HTML document into the following chain of commands:'
16
18
  warn commands
17
19
  end
18
20
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'nokogiri'
2
4
  require 'open-uri'
3
5
  require 'open3'
@@ -8,12 +10,13 @@ module HttpSpell
8
10
  class Spider
9
11
  attr_reader :todo, :done
10
12
 
11
- def initialize(starting_point, whitelist: nil, blacklist: [], tracing: false)
13
+ def initialize(starting_point, whitelist: nil, blacklist: [], verbose: false, tracing: false)
12
14
  @todo = []
13
15
  @done = []
14
16
  todo << Addressable::URI.parse(starting_point)
15
17
  @whitelist = whitelist || [/^#{starting_point}/]
16
18
  @blacklist = blacklist
19
+ @verbose = verbose
17
20
  @tracing = tracing
18
21
  end
19
22
 
@@ -26,13 +29,13 @@ module HttpSpell
26
29
  begin
27
30
  extracted = links(url) do |u, d|
28
31
  yield u, d if block_given?
29
- rescue
32
+ rescue StandardError
30
33
  warn "Callback error for #{url}: #{$ERROR_INFO}"
31
34
  warn $ERROR_INFO.backtrace if @tracing
32
35
  end
33
36
 
34
37
  done.append(url)
35
- todo.concat(extracted - done - todo)
38
+ todo.concat(extracted - done - todo).uniq!
36
39
  rescue StandardError
37
40
  warn "Skipping #{url} because of #{$ERROR_INFO.message}"
38
41
  warn $ERROR_INFO.backtrace if @tracing
@@ -40,16 +43,16 @@ module HttpSpell
40
43
  end
41
44
  end
42
45
 
43
- return success
46
+ success
44
47
  end
45
48
 
46
49
  private
47
50
 
48
51
  def links(uri)
49
- response = http_get(URI(uri))
52
+ response = http_get(uri)
50
53
 
51
- if response.content_type != 'text/html'
52
- warn "Skipping #{uri} because it is not HTML" if @tracing
54
+ if response.respond_to?(:content_type) && response.content_type != 'text/html'
55
+ warn "Skipping #{uri} because it is not HTML" if @verbose
53
56
  return []
54
57
  end
55
58
 
@@ -60,17 +63,19 @@ module HttpSpell
60
63
  link = uri.join(link) if link.relative?
61
64
 
62
65
  if @whitelist.none? { |re| re.match?(link.to_s) }
63
- warn "Skipping #{link} because it is not on the whitelist #{@whitelist}" if @tracing
66
+ warn "Skipping #{link} because it is not on the whitelist #{@whitelist}" if @verbose
64
67
  next
65
68
  end
66
69
 
67
70
  if @blacklist.any? { |re| re.match?(link.to_s) }
68
- # TODO Print _which_ entry of the blacklist matches
69
- warn "Skipping #{link} because it is on the blacklist #{@blacklist}" if @tracing
71
+ # TODO: Print _which_ entry of the blacklist matches
72
+ warn "Skipping #{link} because it is on the blacklist #{@blacklist}" if @verbose
70
73
  next
71
74
  end
72
75
 
73
- # TODO Ignore same page links (some anchor)
76
+ # Ignore fragment; we always check the whole page
77
+ link.fragment = nil
78
+
74
79
  link
75
80
  rescue StandardError
76
81
  warn $ERROR_INFO.message
@@ -79,7 +84,7 @@ module HttpSpell
79
84
 
80
85
  yield uri, doc if block_given?
81
86
 
82
- warn "Adding #{links.size} links from #{uri}" if @tracing
87
+ warn "Adding #{links.size} links from #{uri}" if @verbose
83
88
  links
84
89
  end
85
90
 
@@ -88,10 +93,10 @@ module HttpSpell
88
93
  tries = 10
89
94
 
90
95
  begin
91
- uri.open(redirect: false)
92
- rescue OpenURI::HTTPRedirect => redirect
93
- uri = redirect.uri
94
- retry if (tries -= 1) > 0
96
+ URI.parse(uri).open(redirect: false)
97
+ rescue OpenURI::HTTPRedirect => e
98
+ uri = e.uri
99
+ retry if (tries -= 1).positive?
95
100
  raise
96
101
  end
97
102
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HttpSpell
4
- VERSION = '1.3.0'
4
+ VERSION = '1.4.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: httpspell
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steffen Uhlig
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-12 00:00:00.000000000 Z
11
+ date: 2024-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -38,164 +38,10 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: aruba
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: bundler
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: guard
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: guard-bundler
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: guard-rspec
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: pry
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - ">="
116
- - !ruby/object:Gem::Version
117
- version: '0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: '0'
125
- - !ruby/object:Gem::Dependency
126
- name: pry-byebug
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - ">="
130
- - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - ">="
137
- - !ruby/object:Gem::Version
138
- version: '0'
139
- - !ruby/object:Gem::Dependency
140
- name: rake
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - ">="
144
- - !ruby/object:Gem::Version
145
- version: '0'
146
- type: :development
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - ">="
151
- - !ruby/object:Gem::Version
152
- version: '0'
153
- - !ruby/object:Gem::Dependency
154
- name: rspec
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - ">="
158
- - !ruby/object:Gem::Version
159
- version: '0'
160
- type: :development
161
- prerelease: false
162
- version_requirements: !ruby/object:Gem::Requirement
163
- requirements:
164
- - - ">="
165
- - !ruby/object:Gem::Version
166
- version: '0'
167
- - !ruby/object:Gem::Dependency
168
- name: rubocop
169
- requirement: !ruby/object:Gem::Requirement
170
- requirements:
171
- - - ">="
172
- - !ruby/object:Gem::Version
173
- version: '0'
174
- type: :development
175
- prerelease: false
176
- version_requirements: !ruby/object:Gem::Requirement
177
- requirements:
178
- - - ">="
179
- - !ruby/object:Gem::Version
180
- version: '0'
181
- - !ruby/object:Gem::Dependency
182
- name: stub_server
183
- requirement: !ruby/object:Gem::Requirement
184
- requirements:
185
- - - ">="
186
- - !ruby/object:Gem::Version
187
- version: '0'
188
- type: :development
189
- prerelease: false
190
- version_requirements: !ruby/object:Gem::Requirement
191
- requirements:
192
- - - ">="
193
- - !ruby/object:Gem::Version
194
- version: '0'
195
41
  description: |-
196
42
  httpspell is a spellchecker that recursively fetches
197
- HTML pages, converts them to plain text using pandoc, and
198
- spellchecks them with hunspell.
43
+ HTML pages, converts them to plain text using pandoc, and
44
+ spellchecks them with hunspell.
199
45
  email:
200
46
  - steffen@familie-uhlig.net
201
47
  executables:
@@ -203,25 +49,29 @@ executables:
203
49
  extensions: []
204
50
  extra_rdoc_files: []
205
51
  files:
52
+ - ".github/dependabot.yml"
206
53
  - ".gitignore"
54
+ - ".mergify.yml"
207
55
  - ".rspec"
208
56
  - ".rubocop.yml"
209
- - ".travis.yml"
57
+ - ".ruby-version"
210
58
  - Gemfile
211
59
  - Gemfile.lock
60
+ - Guardfile
212
61
  - README.markdown
213
62
  - Rakefile
214
63
  - TODO.markdown
215
64
  - exe/httpspell
216
65
  - httpspell.gemspec
217
- - lib/httpspell/spellchecker.rb
218
- - lib/httpspell/spider.rb
219
- - lib/httpspell/version.rb
220
- homepage:
66
+ - lib/http_spell/spellchecker.rb
67
+ - lib/http_spell/spider.rb
68
+ - lib/http_spell/version.rb
69
+ homepage:
221
70
  licenses:
222
71
  - MIT
223
- metadata: {}
224
- post_install_message:
72
+ metadata:
73
+ rubygems_mfa_required: 'true'
74
+ post_install_message:
225
75
  rdoc_options: []
226
76
  require_paths:
227
77
  - lib
@@ -236,8 +86,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
236
86
  - !ruby/object:Gem::Version
237
87
  version: '0'
238
88
  requirements: []
239
- rubygems_version: 3.0.1
240
- signing_key:
89
+ rubygems_version: 3.5.9
90
+ signing_key:
241
91
  specification_version: 4
242
92
  summary: HTTP spellchecker
243
93
  test_files: []
data/.travis.yml DELETED
@@ -1,6 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.5.1
4
- before_install:
5
- - sudo apt-get -qq update
6
- - sudo apt-get install -y pandoc hunspell hunspell-en-us