httpspell 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +57 -55
- data/TODO.markdown +6 -0
- data/lib/httpspell/spider.rb +14 -1
- data/lib/httpspell/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ec39e7efff90e1b266f19714e6b6ce91a1a714e149be0df491e0e04cf5bef564
|
|
4
|
+
data.tar.gz: 58be9917fc0e13ad4653e6687b19b018d30be56b0c0c9d62da8e39ef169264e6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 045cc45fc3bd73e5cb4694fec14dec5afad13ab8402cdc90173b621f21a8a11c0b02a375404e0dc332c7e6ec29a571ce3e6d5b152283ad6bb9323161f583579d
|
|
7
|
+
data.tar.gz: de4f50971f4d12a29c829b47a14f54d84c005c9ff7d421fcfcd672fdf065eb7db5ad4c27a83933e4d9de4bf4417d3cfbdf881a312416a0c6b0a95018637592f7
|
data/Gemfile.lock
CHANGED
|
@@ -1,51 +1,51 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
httpspell (1.
|
|
4
|
+
httpspell (1.3.0)
|
|
5
5
|
addressable
|
|
6
6
|
nokogiri
|
|
7
7
|
|
|
8
8
|
GEM
|
|
9
9
|
remote: https://rubygems.org/
|
|
10
10
|
specs:
|
|
11
|
-
addressable (2.
|
|
11
|
+
addressable (2.6.0)
|
|
12
12
|
public_suffix (>= 2.0.2, < 4.0)
|
|
13
|
-
aruba (0.14.
|
|
14
|
-
childprocess (>= 0.6.3, <
|
|
13
|
+
aruba (0.14.9)
|
|
14
|
+
childprocess (>= 0.6.3, < 1.1.0)
|
|
15
15
|
contracts (~> 0.9)
|
|
16
16
|
cucumber (>= 1.3.19)
|
|
17
|
-
ffi (~> 1.9
|
|
17
|
+
ffi (~> 1.9)
|
|
18
18
|
rspec-expectations (>= 2.99)
|
|
19
19
|
thor (~> 0.19)
|
|
20
20
|
ast (2.4.0)
|
|
21
|
-
backports (3.
|
|
21
|
+
backports (3.13.0)
|
|
22
22
|
builder (3.2.3)
|
|
23
|
-
byebug (
|
|
24
|
-
childprocess (0.
|
|
25
|
-
|
|
23
|
+
byebug (11.0.1)
|
|
24
|
+
childprocess (1.0.1)
|
|
25
|
+
rake (< 13.0)
|
|
26
26
|
coderay (1.1.2)
|
|
27
27
|
contracts (0.16.0)
|
|
28
|
-
cucumber (3.1.
|
|
28
|
+
cucumber (3.1.2)
|
|
29
29
|
builder (>= 2.1.2)
|
|
30
|
-
cucumber-core (~> 3.
|
|
31
|
-
cucumber-expressions (~>
|
|
30
|
+
cucumber-core (~> 3.2.0)
|
|
31
|
+
cucumber-expressions (~> 6.0.1)
|
|
32
32
|
cucumber-wire (~> 0.0.1)
|
|
33
33
|
diff-lcs (~> 1.3)
|
|
34
|
-
gherkin (~> 5.0)
|
|
34
|
+
gherkin (~> 5.1.0)
|
|
35
35
|
multi_json (>= 1.7.5, < 2.0)
|
|
36
36
|
multi_test (>= 0.1.2)
|
|
37
|
-
cucumber-core (3.1
|
|
37
|
+
cucumber-core (3.2.1)
|
|
38
38
|
backports (>= 3.8.0)
|
|
39
39
|
cucumber-tag_expressions (~> 1.1.0)
|
|
40
|
-
gherkin (
|
|
41
|
-
cucumber-expressions (
|
|
40
|
+
gherkin (~> 5.0)
|
|
41
|
+
cucumber-expressions (6.0.1)
|
|
42
42
|
cucumber-tag_expressions (1.1.1)
|
|
43
43
|
cucumber-wire (0.0.1)
|
|
44
44
|
diff-lcs (1.3)
|
|
45
|
-
ffi (1.
|
|
45
|
+
ffi (1.10.0)
|
|
46
46
|
formatador (0.2.5)
|
|
47
47
|
gherkin (5.1.0)
|
|
48
|
-
guard (2.
|
|
48
|
+
guard (2.15.0)
|
|
49
49
|
formatador (>= 0.2.4)
|
|
50
50
|
listen (>= 2.7, < 4.0)
|
|
51
51
|
lumberjack (>= 1.0.12, < 2.0)
|
|
@@ -54,8 +54,8 @@ GEM
|
|
|
54
54
|
pry (>= 0.9.12)
|
|
55
55
|
shellany (~> 0.0)
|
|
56
56
|
thor (>= 0.18.1)
|
|
57
|
-
guard-bundler (2.1
|
|
58
|
-
bundler (
|
|
57
|
+
guard-bundler (2.2.1)
|
|
58
|
+
bundler (>= 1.3.0, < 3)
|
|
59
59
|
guard (~> 2.2)
|
|
60
60
|
guard-compat (~> 1.1)
|
|
61
61
|
guard-compat (1.2.1)
|
|
@@ -63,65 +63,67 @@ GEM
|
|
|
63
63
|
guard (~> 2.1)
|
|
64
64
|
guard-compat (~> 1.1)
|
|
65
65
|
rspec (>= 2.99.0, < 4.0)
|
|
66
|
+
jaro_winkler (1.5.2)
|
|
66
67
|
listen (3.1.5)
|
|
67
68
|
rb-fsevent (~> 0.9, >= 0.9.4)
|
|
68
69
|
rb-inotify (~> 0.9, >= 0.9.7)
|
|
69
70
|
ruby_dep (~> 1.2)
|
|
70
71
|
lumberjack (1.0.13)
|
|
71
|
-
method_source (0.9.
|
|
72
|
-
mini_portile2 (2.
|
|
72
|
+
method_source (0.9.2)
|
|
73
|
+
mini_portile2 (2.4.0)
|
|
73
74
|
multi_json (1.13.1)
|
|
74
75
|
multi_test (0.1.2)
|
|
75
76
|
nenv (0.3.0)
|
|
76
|
-
nokogiri (1.
|
|
77
|
-
mini_portile2 (~> 2.
|
|
77
|
+
nokogiri (1.10.2)
|
|
78
|
+
mini_portile2 (~> 2.4.0)
|
|
78
79
|
notiffany (0.1.1)
|
|
79
80
|
nenv (~> 0.1)
|
|
80
81
|
shellany (~> 0.0)
|
|
81
|
-
parallel (1.
|
|
82
|
-
parser (2.
|
|
82
|
+
parallel (1.17.0)
|
|
83
|
+
parser (2.6.2.1)
|
|
83
84
|
ast (~> 2.4.0)
|
|
84
|
-
|
|
85
|
-
pry (0.11.3)
|
|
85
|
+
pry (0.12.2)
|
|
86
86
|
coderay (~> 1.1.0)
|
|
87
87
|
method_source (~> 0.9.0)
|
|
88
|
-
pry-byebug (3.
|
|
89
|
-
byebug (~>
|
|
88
|
+
pry-byebug (3.7.0)
|
|
89
|
+
byebug (~> 11.0)
|
|
90
90
|
pry (~> 0.10)
|
|
91
|
-
|
|
92
|
-
|
|
91
|
+
psych (3.1.0)
|
|
92
|
+
public_suffix (3.0.3)
|
|
93
|
+
rack (2.0.7)
|
|
93
94
|
rainbow (3.0.0)
|
|
94
|
-
rake (12.3.
|
|
95
|
+
rake (12.3.2)
|
|
95
96
|
rb-fsevent (0.10.3)
|
|
96
|
-
rb-inotify (0.
|
|
97
|
-
ffi (
|
|
98
|
-
rspec (3.
|
|
99
|
-
rspec-core (~> 3.
|
|
100
|
-
rspec-expectations (~> 3.
|
|
101
|
-
rspec-mocks (~> 3.
|
|
102
|
-
rspec-core (3.
|
|
103
|
-
rspec-support (~> 3.
|
|
104
|
-
rspec-expectations (3.
|
|
97
|
+
rb-inotify (0.10.0)
|
|
98
|
+
ffi (~> 1.0)
|
|
99
|
+
rspec (3.8.0)
|
|
100
|
+
rspec-core (~> 3.8.0)
|
|
101
|
+
rspec-expectations (~> 3.8.0)
|
|
102
|
+
rspec-mocks (~> 3.8.0)
|
|
103
|
+
rspec-core (3.8.0)
|
|
104
|
+
rspec-support (~> 3.8.0)
|
|
105
|
+
rspec-expectations (3.8.2)
|
|
105
106
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
106
|
-
rspec-support (~> 3.
|
|
107
|
-
rspec-mocks (3.
|
|
107
|
+
rspec-support (~> 3.8.0)
|
|
108
|
+
rspec-mocks (3.8.0)
|
|
108
109
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
109
|
-
rspec-support (~> 3.
|
|
110
|
-
rspec-support (3.
|
|
111
|
-
rubocop (0.
|
|
110
|
+
rspec-support (~> 3.8.0)
|
|
111
|
+
rspec-support (3.8.0)
|
|
112
|
+
rubocop (0.67.2)
|
|
113
|
+
jaro_winkler (~> 1.5.1)
|
|
112
114
|
parallel (~> 1.10)
|
|
113
|
-
parser (>= 2.5)
|
|
114
|
-
|
|
115
|
+
parser (>= 2.5, != 2.5.1.1)
|
|
116
|
+
psych (>= 3.1.0)
|
|
115
117
|
rainbow (>= 2.2.2, < 4.0)
|
|
116
118
|
ruby-progressbar (~> 1.7)
|
|
117
|
-
unicode-display_width (
|
|
118
|
-
ruby-progressbar (1.
|
|
119
|
+
unicode-display_width (>= 1.4.0, < 1.6)
|
|
120
|
+
ruby-progressbar (1.10.0)
|
|
119
121
|
ruby_dep (1.5.0)
|
|
120
122
|
shellany (0.0.1)
|
|
121
|
-
stub_server (0.
|
|
123
|
+
stub_server (0.4.0)
|
|
122
124
|
rack
|
|
123
|
-
thor (0.20.
|
|
124
|
-
unicode-display_width (1.
|
|
125
|
+
thor (0.20.3)
|
|
126
|
+
unicode-display_width (1.5.0)
|
|
125
127
|
|
|
126
128
|
PLATFORMS
|
|
127
129
|
ruby
|
|
@@ -141,4 +143,4 @@ DEPENDENCIES
|
|
|
141
143
|
stub_server
|
|
142
144
|
|
|
143
145
|
BUNDLED WITH
|
|
144
|
-
1.
|
|
146
|
+
1.17.2
|
data/TODO.markdown
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
* Bail out if lang cannot be inferred and is not given on cmdline
|
|
2
|
+
* exe/httpspell: # TODO: --recursive, defaults to false
|
|
3
|
+
* exe/httpspell: # TODO wget has some additional options for recursive behavior that should be reviewed
|
|
4
|
+
* exe/httpspell: # TODO: Find sections with a lang attribute and handle them separately
|
|
5
|
+
* lib/httpspell/spider.rb: # TODO Print _which_ entry of the blacklist matches
|
|
6
|
+
* lib/httpspell/spider.rb: # TODO Ignore same page links (some anchor)
|
data/lib/httpspell/spider.rb
CHANGED
|
@@ -46,7 +46,7 @@ module HttpSpell
|
|
|
46
46
|
private
|
|
47
47
|
|
|
48
48
|
def links(uri)
|
|
49
|
-
response = URI(uri)
|
|
49
|
+
response = http_get(URI(uri))
|
|
50
50
|
|
|
51
51
|
if response.content_type != 'text/html'
|
|
52
52
|
warn "Skipping #{uri} because it is not HTML" if @tracing
|
|
@@ -82,5 +82,18 @@ module HttpSpell
|
|
|
82
82
|
warn "Adding #{links.size} links from #{uri}" if @tracing
|
|
83
83
|
links
|
|
84
84
|
end
|
|
85
|
+
|
|
86
|
+
# https://twin.github.io/improving-open-uri/
|
|
87
|
+
def http_get(uri)
|
|
88
|
+
tries = 10
|
|
89
|
+
|
|
90
|
+
begin
|
|
91
|
+
uri.open(redirect: false)
|
|
92
|
+
rescue OpenURI::HTTPRedirect => redirect
|
|
93
|
+
uri = redirect.uri
|
|
94
|
+
retry if (tries -= 1) > 0
|
|
95
|
+
raise
|
|
96
|
+
end
|
|
97
|
+
end
|
|
85
98
|
end
|
|
86
99
|
end
|
data/lib/httpspell/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: httpspell
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steffen Uhlig
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2019-04-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: addressable
|
|
@@ -211,6 +211,7 @@ files:
|
|
|
211
211
|
- Gemfile.lock
|
|
212
212
|
- README.markdown
|
|
213
213
|
- Rakefile
|
|
214
|
+
- TODO.markdown
|
|
214
215
|
- exe/httpspell
|
|
215
216
|
- httpspell.gemspec
|
|
216
217
|
- lib/httpspell/spellchecker.rb
|
|
@@ -235,8 +236,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
235
236
|
- !ruby/object:Gem::Version
|
|
236
237
|
version: '0'
|
|
237
238
|
requirements: []
|
|
238
|
-
|
|
239
|
-
rubygems_version: 2.7.6
|
|
239
|
+
rubygems_version: 3.0.1
|
|
240
240
|
signing_key:
|
|
241
241
|
specification_version: 4
|
|
242
242
|
summary: HTTP spellchecker
|