httpspell 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -0
- data/Gemfile.lock +1 -1
- data/TODO.markdown +0 -1
- data/exe/httpspell +3 -3
- data/httpspell.gemspec +1 -1
- data/lib/{httpspell → http_spell}/spellchecker.rb +3 -1
- data/lib/{httpspell → http_spell}/spider.rb +13 -9
- data/lib/{httpspell → http_spell}/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc09324c003c7b14e08fa255b7a31c0a9aeb143df033da9aea300619a47268ba
|
4
|
+
data.tar.gz: 6890352a3cef38e243e2506398d58736c8179c2e0443a2b6ff341165e724dba0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 826bb8e875b2f1584dd5c052ab9777e616e1da0d6844263589b027c3eabfb07955155e0c43b8b1b8dc253d720eba952e80330c38035fff53fc1943420dea7454
|
7
|
+
data.tar.gz: 7a4e3c9aaa586d4fbdc41971424cd5f064793ff18cba8d8606a452b3cee36070af44aa2f78ab307c71a613404cc1e490af1f56eca11068675183625f5360790e
|
data/.rubocop.yml
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
+
require:
|
2
|
+
- rubocop-rake
|
3
|
+
- rubocop-rspec
|
1
4
|
AllCops:
|
2
5
|
NewCops: enable
|
3
6
|
TargetRubyVersion: 3.3
|
4
7
|
Include:
|
8
|
+
- '**/*.rb'
|
5
9
|
- '**/Gemfile'
|
6
10
|
- '**/Rakefile'
|
7
11
|
- '**/config.ru'
|
@@ -23,3 +27,19 @@ Layout/LineLength:
|
|
23
27
|
Max: 160
|
24
28
|
Style/Documentation:
|
25
29
|
Enabled: false
|
30
|
+
Metrics/AbcSize:
|
31
|
+
Enabled: false
|
32
|
+
Metrics/MethodLength:
|
33
|
+
Enabled: false
|
34
|
+
Metrics/CyclomaticComplexity:
|
35
|
+
Enabled: false
|
36
|
+
Style/TrailingCommaInArrayLiteral:
|
37
|
+
Enabled: false
|
38
|
+
RSpec/ExampleWording:
|
39
|
+
Enabled: false
|
40
|
+
RSpec/InstanceVariable:
|
41
|
+
AssignmentOnly: true
|
42
|
+
RSpec/ExampleLength:
|
43
|
+
Max: 10
|
44
|
+
Metrics/PerceivedComplexity:
|
45
|
+
Max: 16
|
data/Gemfile.lock
CHANGED
data/TODO.markdown
CHANGED
@@ -2,4 +2,3 @@
|
|
2
2
|
* exe/httpspell: # TODO: --recursive, defaults to false
|
3
3
|
* exe/httpspell: # TODO wget has some additional options for recursive behavior that should be reviewed
|
4
4
|
* lib/httpspell/spider.rb: # TODO Print _which_ entry of the blacklist matches
|
5
|
-
* lib/httpspell/spider.rb: # TODO Ignore same page links (some anchor)
|
data/exe/httpspell
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'optparse'
|
5
|
-
require '
|
6
|
-
require '
|
7
|
-
require '
|
5
|
+
require 'http_spell/spider'
|
6
|
+
require 'http_spell/spellchecker'
|
7
|
+
require 'http_spell/version'
|
8
8
|
|
9
9
|
personal_dictionary_path = nil
|
10
10
|
force_language = nil
|
data/httpspell.gemspec
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module HttpSpell
|
2
4
|
class SpellChecker
|
3
5
|
def initialize(personal_dictionary_path = nil, verbose: false)
|
@@ -12,7 +14,7 @@ module HttpSpell
|
|
12
14
|
]
|
13
15
|
|
14
16
|
if @verbose
|
15
|
-
warn
|
17
|
+
warn 'Piping the HTML document into the following chain of commands:'
|
16
18
|
warn commands
|
17
19
|
end
|
18
20
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'open3'
|
@@ -27,13 +29,13 @@ module HttpSpell
|
|
27
29
|
begin
|
28
30
|
extracted = links(url) do |u, d|
|
29
31
|
yield u, d if block_given?
|
30
|
-
rescue
|
32
|
+
rescue StandardError
|
31
33
|
warn "Callback error for #{url}: #{$ERROR_INFO}"
|
32
34
|
warn $ERROR_INFO.backtrace if @tracing
|
33
35
|
end
|
34
36
|
|
35
37
|
done.append(url)
|
36
|
-
todo.concat(extracted - done - todo)
|
38
|
+
todo.concat(extracted - done - todo).uniq!
|
37
39
|
rescue StandardError
|
38
40
|
warn "Skipping #{url} because of #{$ERROR_INFO.message}"
|
39
41
|
warn $ERROR_INFO.backtrace if @tracing
|
@@ -41,7 +43,7 @@ module HttpSpell
|
|
41
43
|
end
|
42
44
|
end
|
43
45
|
|
44
|
-
|
46
|
+
success
|
45
47
|
end
|
46
48
|
|
47
49
|
private
|
@@ -66,12 +68,14 @@ module HttpSpell
|
|
66
68
|
end
|
67
69
|
|
68
70
|
if @blacklist.any? { |re| re.match?(link.to_s) }
|
69
|
-
# TODO Print _which_ entry of the blacklist matches
|
71
|
+
# TODO: Print _which_ entry of the blacklist matches
|
70
72
|
warn "Skipping #{link} because it is on the blacklist #{@blacklist}" if @verbose
|
71
73
|
next
|
72
74
|
end
|
73
75
|
|
74
|
-
#
|
76
|
+
# Ignore fragment; we always check the whole page
|
77
|
+
link.fragment = nil
|
78
|
+
|
75
79
|
link
|
76
80
|
rescue StandardError
|
77
81
|
warn $ERROR_INFO.message
|
@@ -89,10 +93,10 @@ module HttpSpell
|
|
89
93
|
tries = 10
|
90
94
|
|
91
95
|
begin
|
92
|
-
URI.open(
|
93
|
-
rescue OpenURI::HTTPRedirect =>
|
94
|
-
uri =
|
95
|
-
retry if (tries -= 1)
|
96
|
+
URI.parse(uri).open(redirect: false)
|
97
|
+
rescue OpenURI::HTTPRedirect => e
|
98
|
+
uri = e.uri
|
99
|
+
retry if (tries -= 1).positive?
|
96
100
|
raise
|
97
101
|
end
|
98
102
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: httpspell
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steffen Uhlig
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -63,9 +63,9 @@ files:
|
|
63
63
|
- TODO.markdown
|
64
64
|
- exe/httpspell
|
65
65
|
- httpspell.gemspec
|
66
|
-
- lib/
|
67
|
-
- lib/
|
68
|
-
- lib/
|
66
|
+
- lib/http_spell/spellchecker.rb
|
67
|
+
- lib/http_spell/spider.rb
|
68
|
+
- lib/http_spell/version.rb
|
69
69
|
homepage:
|
70
70
|
licenses:
|
71
71
|
- MIT
|