httpspell 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -0
- data/Gemfile.lock +1 -1
- data/TODO.markdown +0 -1
- data/exe/httpspell +3 -3
- data/httpspell.gemspec +1 -1
- data/lib/{httpspell → http_spell}/spellchecker.rb +3 -1
- data/lib/{httpspell → http_spell}/spider.rb +13 -9
- data/lib/{httpspell → http_spell}/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc09324c003c7b14e08fa255b7a31c0a9aeb143df033da9aea300619a47268ba
|
4
|
+
data.tar.gz: 6890352a3cef38e243e2506398d58736c8179c2e0443a2b6ff341165e724dba0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 826bb8e875b2f1584dd5c052ab9777e616e1da0d6844263589b027c3eabfb07955155e0c43b8b1b8dc253d720eba952e80330c38035fff53fc1943420dea7454
|
7
|
+
data.tar.gz: 7a4e3c9aaa586d4fbdc41971424cd5f064793ff18cba8d8606a452b3cee36070af44aa2f78ab307c71a613404cc1e490af1f56eca11068675183625f5360790e
|
data/.rubocop.yml
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
+
require:
|
2
|
+
- rubocop-rake
|
3
|
+
- rubocop-rspec
|
1
4
|
AllCops:
|
2
5
|
NewCops: enable
|
3
6
|
TargetRubyVersion: 3.3
|
4
7
|
Include:
|
8
|
+
- '**/*.rb'
|
5
9
|
- '**/Gemfile'
|
6
10
|
- '**/Rakefile'
|
7
11
|
- '**/config.ru'
|
@@ -23,3 +27,19 @@ Layout/LineLength:
|
|
23
27
|
Max: 160
|
24
28
|
Style/Documentation:
|
25
29
|
Enabled: false
|
30
|
+
Metrics/AbcSize:
|
31
|
+
Enabled: false
|
32
|
+
Metrics/MethodLength:
|
33
|
+
Enabled: false
|
34
|
+
Metrics/CyclomaticComplexity:
|
35
|
+
Enabled: false
|
36
|
+
Style/TrailingCommaInArrayLiteral:
|
37
|
+
Enabled: false
|
38
|
+
RSpec/ExampleWording:
|
39
|
+
Enabled: false
|
40
|
+
RSpec/InstanceVariable:
|
41
|
+
AssignmentOnly: true
|
42
|
+
RSpec/ExampleLength:
|
43
|
+
Max: 10
|
44
|
+
Metrics/PerceivedComplexity:
|
45
|
+
Max: 16
|
data/Gemfile.lock
CHANGED
data/TODO.markdown
CHANGED
@@ -2,4 +2,3 @@
|
|
2
2
|
* exe/httpspell: # TODO: --recursive, defaults to false
|
3
3
|
* exe/httpspell: # TODO wget has some additional options for recursive behavior that should be reviewed
|
4
4
|
* lib/httpspell/spider.rb: # TODO Print _which_ entry of the blacklist matches
|
5
|
-
* lib/httpspell/spider.rb: # TODO Ignore same page links (some anchor)
|
data/exe/httpspell
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'optparse'
|
5
|
-
require '
|
6
|
-
require '
|
7
|
-
require '
|
5
|
+
require 'http_spell/spider'
|
6
|
+
require 'http_spell/spellchecker'
|
7
|
+
require 'http_spell/version'
|
8
8
|
|
9
9
|
personal_dictionary_path = nil
|
10
10
|
force_language = nil
|
data/httpspell.gemspec
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module HttpSpell
|
2
4
|
class SpellChecker
|
3
5
|
def initialize(personal_dictionary_path = nil, verbose: false)
|
@@ -12,7 +14,7 @@ module HttpSpell
|
|
12
14
|
]
|
13
15
|
|
14
16
|
if @verbose
|
15
|
-
warn
|
17
|
+
warn 'Piping the HTML document into the following chain of commands:'
|
16
18
|
warn commands
|
17
19
|
end
|
18
20
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'open3'
|
@@ -27,13 +29,13 @@ module HttpSpell
|
|
27
29
|
begin
|
28
30
|
extracted = links(url) do |u, d|
|
29
31
|
yield u, d if block_given?
|
30
|
-
rescue
|
32
|
+
rescue StandardError
|
31
33
|
warn "Callback error for #{url}: #{$ERROR_INFO}"
|
32
34
|
warn $ERROR_INFO.backtrace if @tracing
|
33
35
|
end
|
34
36
|
|
35
37
|
done.append(url)
|
36
|
-
todo.concat(extracted - done - todo)
|
38
|
+
todo.concat(extracted - done - todo).uniq!
|
37
39
|
rescue StandardError
|
38
40
|
warn "Skipping #{url} because of #{$ERROR_INFO.message}"
|
39
41
|
warn $ERROR_INFO.backtrace if @tracing
|
@@ -41,7 +43,7 @@ module HttpSpell
|
|
41
43
|
end
|
42
44
|
end
|
43
45
|
|
44
|
-
|
46
|
+
success
|
45
47
|
end
|
46
48
|
|
47
49
|
private
|
@@ -66,12 +68,14 @@ module HttpSpell
|
|
66
68
|
end
|
67
69
|
|
68
70
|
if @blacklist.any? { |re| re.match?(link.to_s) }
|
69
|
-
# TODO Print _which_ entry of the blacklist matches
|
71
|
+
# TODO: Print _which_ entry of the blacklist matches
|
70
72
|
warn "Skipping #{link} because it is on the blacklist #{@blacklist}" if @verbose
|
71
73
|
next
|
72
74
|
end
|
73
75
|
|
74
|
-
#
|
76
|
+
# Ignore fragment; we always check the whole page
|
77
|
+
link.fragment = nil
|
78
|
+
|
75
79
|
link
|
76
80
|
rescue StandardError
|
77
81
|
warn $ERROR_INFO.message
|
@@ -89,10 +93,10 @@ module HttpSpell
|
|
89
93
|
tries = 10
|
90
94
|
|
91
95
|
begin
|
92
|
-
URI.open(
|
93
|
-
rescue OpenURI::HTTPRedirect =>
|
94
|
-
uri =
|
95
|
-
retry if (tries -= 1)
|
96
|
+
URI.parse(uri).open(redirect: false)
|
97
|
+
rescue OpenURI::HTTPRedirect => e
|
98
|
+
uri = e.uri
|
99
|
+
retry if (tries -= 1).positive?
|
96
100
|
raise
|
97
101
|
end
|
98
102
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: httpspell
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steffen Uhlig
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -63,9 +63,9 @@ files:
|
|
63
63
|
- TODO.markdown
|
64
64
|
- exe/httpspell
|
65
65
|
- httpspell.gemspec
|
66
|
-
- lib/
|
67
|
-
- lib/
|
68
|
-
- lib/
|
66
|
+
- lib/http_spell/spellchecker.rb
|
67
|
+
- lib/http_spell/spider.rb
|
68
|
+
- lib/http_spell/version.rb
|
69
69
|
homepage:
|
70
70
|
licenses:
|
71
71
|
- MIT
|