metainspector 5.13.1 → 5.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +11 -1
- data/CHANGELOG.md +3 -3
- data/Gemfile.lock +51 -30
- data/README.md +22 -5
- data/examples/faraday_redirect_options.rb +42 -0
- data/examples/redirect_web_server.rb +5 -0
- data/lib/meta_inspector/request.rb +6 -2
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +7 -4
- data/spec/document_spec.rb +2 -2
- data/spec/meta_inspector/links_spec.rb +1 -1
- data/spec/meta_inspector/redirections_spec.rb +22 -0
- data/spec/spec_helper.rb +2 -1
- metadata +60 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5098979bb849154a0fde8f74483401c6fac1d07aec69ce4b1cc8c4abc4d9eb3d
|
|
4
|
+
data.tar.gz: 7c862c95f5b1a10faa25bf5346e2b3c6e26114fabf787e1ec061caa8f70f06d9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 21ca7857d9cd32d2e0b5bbd2f29af1438233174d008b1d7b68df324c25da3ff8ca5b86f496cccc1c15700360e17a4f0318021483e41fd1d8d6ec3fc35e0479bc
|
|
7
|
+
data.tar.gz: 84cf704542d0ca037ce46f9fbbca4f6c354928c5d50dbf56f22e6394413aabe087ed97ca51ad9de9279e7a621f4b38972370dd29b1a1a76645ef833e2af6f3b3
|
data/.circleci/config.yml
CHANGED
|
@@ -38,6 +38,15 @@ jobs:
|
|
|
38
38
|
- run:
|
|
39
39
|
name: Run tests
|
|
40
40
|
command: bundle exec rake
|
|
41
|
+
test_3_2:
|
|
42
|
+
docker:
|
|
43
|
+
- image: cimg/ruby:3.2.1
|
|
44
|
+
steps:
|
|
45
|
+
- checkout
|
|
46
|
+
- ruby/install-deps
|
|
47
|
+
- run:
|
|
48
|
+
name: Run tests
|
|
49
|
+
command: bundle exec rake
|
|
41
50
|
workflows:
|
|
42
51
|
version: 2
|
|
43
52
|
deploy:
|
|
@@ -45,4 +54,5 @@ workflows:
|
|
|
45
54
|
- test_2_6
|
|
46
55
|
- test_2_7
|
|
47
56
|
- test_3_0
|
|
48
|
-
- test_3_1
|
|
57
|
+
- test_3_1
|
|
58
|
+
- test_3_2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
# MetaInpector Changelog
|
|
2
2
|
|
|
3
|
-
## [Changes in 5.13.0](https://github.com/
|
|
3
|
+
## [Changes in 5.13.0](https://github.com/jaimeiniesta/metainspector/compare/v5.12.1...v5.13.0)
|
|
4
4
|
|
|
5
5
|
* Remove support for #feed that was deprecated in 5.9
|
|
6
6
|
* Add support for Ruby 3.1
|
|
7
7
|
|
|
8
|
-
## [Changes in 5.12.1](https://github.com/
|
|
8
|
+
## [Changes in 5.12.1](https://github.com/jaimeiniesta/metainspector/compare/v5.12.0...v5.12.1)
|
|
9
9
|
|
|
10
10
|
* Update dependencies: rubocop, nokogiri
|
|
11
11
|
|
|
12
|
-
## [Changes in 5.12.0](https://github.com/
|
|
12
|
+
## [Changes in 5.12.0](https://github.com/jaimeiniesta/metainspector/compare/v5.11.2...v5.12.0)
|
|
13
13
|
|
|
14
14
|
* Support Ruby 3.0
|
|
15
15
|
|
data/Gemfile.lock
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
metainspector (5.
|
|
5
|
-
addressable (~> 2.8)
|
|
4
|
+
metainspector (5.15.0)
|
|
5
|
+
addressable (~> 2.8.4)
|
|
6
6
|
faraday (~> 2.5)
|
|
7
7
|
faraday-cookie_jar (~> 0.0)
|
|
8
8
|
faraday-encoding (~> 0.0)
|
|
9
9
|
faraday-follow_redirects (~> 0.3)
|
|
10
|
-
faraday-gzip (
|
|
11
|
-
faraday-http-cache (~> 2.
|
|
10
|
+
faraday-gzip (>= 0.1, < 2.0)
|
|
11
|
+
faraday-http-cache (~> 2.5)
|
|
12
12
|
faraday-retry (~> 2.0)
|
|
13
13
|
fastimage (~> 2.2)
|
|
14
14
|
nesty (~> 1.0)
|
|
@@ -17,7 +17,7 @@ PATH
|
|
|
17
17
|
GEM
|
|
18
18
|
remote: http://rubygems.org/
|
|
19
19
|
specs:
|
|
20
|
-
addressable (2.8.
|
|
20
|
+
addressable (2.8.4)
|
|
21
21
|
public_suffix (>= 2.0.2, < 6.0)
|
|
22
22
|
ast (2.4.2)
|
|
23
23
|
awesome_print (1.9.2)
|
|
@@ -27,7 +27,7 @@ GEM
|
|
|
27
27
|
diff-lcs (1.5.0)
|
|
28
28
|
domain_name (0.5.20190701)
|
|
29
29
|
unf (>= 0.0.5, < 1.0.0)
|
|
30
|
-
faraday (2.7.
|
|
30
|
+
faraday (2.7.4)
|
|
31
31
|
faraday-net_http (>= 2.0, < 3.1)
|
|
32
32
|
ruby2_keywords (>= 0.0.4)
|
|
33
33
|
faraday-cookie_jar (0.0.7)
|
|
@@ -37,68 +37,85 @@ GEM
|
|
|
37
37
|
faraday
|
|
38
38
|
faraday-follow_redirects (0.3.0)
|
|
39
39
|
faraday (>= 1, < 3)
|
|
40
|
-
faraday-gzip (
|
|
40
|
+
faraday-gzip (1.0.0)
|
|
41
41
|
faraday (>= 1.0)
|
|
42
42
|
zlib (~> 2.1)
|
|
43
|
-
faraday-http-cache (2.
|
|
43
|
+
faraday-http-cache (2.5.0)
|
|
44
44
|
faraday (>= 0.8)
|
|
45
45
|
faraday-net_http (3.0.2)
|
|
46
|
-
faraday-retry (2.
|
|
46
|
+
faraday-retry (2.1.0)
|
|
47
47
|
faraday (~> 2.0)
|
|
48
48
|
fastimage (2.2.6)
|
|
49
49
|
hashdiff (1.0.1)
|
|
50
50
|
http-cookie (1.0.5)
|
|
51
51
|
domain_name (~> 0.5)
|
|
52
|
-
json (2.6.
|
|
52
|
+
json (2.6.3)
|
|
53
53
|
method_source (1.0.0)
|
|
54
|
-
mini_portile2 (2.8.
|
|
54
|
+
mini_portile2 (2.8.2)
|
|
55
|
+
mustermann (3.0.0)
|
|
56
|
+
ruby2_keywords (~> 0.0.1)
|
|
55
57
|
nesty (1.0.2)
|
|
56
|
-
|
|
58
|
+
nio4r (2.5.9)
|
|
59
|
+
nokogiri (1.14.4)
|
|
57
60
|
mini_portile2 (~> 2.8.0)
|
|
58
61
|
racc (~> 1.4)
|
|
59
|
-
|
|
60
|
-
|
|
62
|
+
nokogiri (1.14.4-arm64-darwin)
|
|
63
|
+
racc (~> 1.4)
|
|
64
|
+
parallel (1.23.0)
|
|
65
|
+
parser (3.2.2.1)
|
|
61
66
|
ast (~> 2.4.1)
|
|
62
|
-
pry (0.14.
|
|
67
|
+
pry (0.14.2)
|
|
63
68
|
coderay (~> 1.1)
|
|
64
69
|
method_source (~> 1.0)
|
|
65
|
-
public_suffix (5.0.
|
|
66
|
-
|
|
70
|
+
public_suffix (5.0.1)
|
|
71
|
+
puma (6.2.2)
|
|
72
|
+
nio4r (~> 2.0)
|
|
73
|
+
racc (1.6.2)
|
|
74
|
+
rack (2.2.7)
|
|
75
|
+
rack-protection (3.0.6)
|
|
76
|
+
rack
|
|
67
77
|
rainbow (3.1.1)
|
|
68
78
|
rake (13.0.6)
|
|
69
|
-
regexp_parser (2.
|
|
79
|
+
regexp_parser (2.8.0)
|
|
80
|
+
resolv (0.2.2)
|
|
70
81
|
rexml (3.2.5)
|
|
71
82
|
rspec (3.12.0)
|
|
72
83
|
rspec-core (~> 3.12.0)
|
|
73
84
|
rspec-expectations (~> 3.12.0)
|
|
74
85
|
rspec-mocks (~> 3.12.0)
|
|
75
|
-
rspec-core (3.12.
|
|
86
|
+
rspec-core (3.12.1)
|
|
76
87
|
rspec-support (~> 3.12.0)
|
|
77
|
-
rspec-expectations (3.12.
|
|
88
|
+
rspec-expectations (3.12.2)
|
|
78
89
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
79
90
|
rspec-support (~> 3.12.0)
|
|
80
|
-
rspec-mocks (3.12.
|
|
91
|
+
rspec-mocks (3.12.3)
|
|
81
92
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
82
93
|
rspec-support (~> 3.12.0)
|
|
83
94
|
rspec-support (3.12.0)
|
|
84
|
-
rubocop (1.
|
|
95
|
+
rubocop (1.51.0)
|
|
85
96
|
json (~> 2.3)
|
|
86
97
|
parallel (~> 1.10)
|
|
87
|
-
parser (>= 3.
|
|
98
|
+
parser (>= 3.2.0.0)
|
|
88
99
|
rainbow (>= 2.2.2, < 4.0)
|
|
89
100
|
regexp_parser (>= 1.8, < 3.0)
|
|
90
101
|
rexml (>= 3.2.5, < 4.0)
|
|
91
|
-
rubocop-ast (>= 1.
|
|
102
|
+
rubocop-ast (>= 1.28.0, < 2.0)
|
|
92
103
|
ruby-progressbar (~> 1.7)
|
|
93
|
-
unicode-display_width (>=
|
|
94
|
-
rubocop-ast (1.
|
|
95
|
-
parser (>= 3.
|
|
96
|
-
ruby-progressbar (1.
|
|
104
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
|
105
|
+
rubocop-ast (1.28.1)
|
|
106
|
+
parser (>= 3.2.1.0)
|
|
107
|
+
ruby-progressbar (1.13.0)
|
|
97
108
|
ruby2_keywords (0.0.5)
|
|
109
|
+
sinatra (3.0.6)
|
|
110
|
+
mustermann (~> 3.0)
|
|
111
|
+
rack (~> 2.2, >= 2.2.4)
|
|
112
|
+
rack-protection (= 3.0.6)
|
|
113
|
+
tilt (~> 2.0)
|
|
114
|
+
tilt (2.1.0)
|
|
98
115
|
unf (0.1.4)
|
|
99
116
|
unf_ext
|
|
100
117
|
unf_ext (0.0.8.2)
|
|
101
|
-
unicode-display_width (2.
|
|
118
|
+
unicode-display_width (2.4.2)
|
|
102
119
|
webmock (3.18.1)
|
|
103
120
|
addressable (>= 2.8.0)
|
|
104
121
|
crack (>= 0.3.2)
|
|
@@ -106,15 +123,19 @@ GEM
|
|
|
106
123
|
zlib (2.1.1)
|
|
107
124
|
|
|
108
125
|
PLATFORMS
|
|
109
|
-
|
|
126
|
+
arm64-darwin-22
|
|
127
|
+
x86_64-linux
|
|
110
128
|
|
|
111
129
|
DEPENDENCIES
|
|
112
130
|
awesome_print (~> 1.9)
|
|
113
131
|
metainspector!
|
|
114
132
|
pry (~> 0.14)
|
|
133
|
+
puma (~> 6.2.2)
|
|
115
134
|
rake (~> 13.0)
|
|
135
|
+
resolv (~> 0.2.2)
|
|
116
136
|
rspec (~> 3.11)
|
|
117
137
|
rubocop (~> 1.34)
|
|
138
|
+
sinatra (~> 3.0.6)
|
|
118
139
|
webmock (~> 3.17)
|
|
119
140
|
|
|
120
141
|
BUNDLED WITH
|
data/README.md
CHANGED
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
# MetaInspector
|
|
2
|
-
[](http://badge.fury.io/rb/metainspector) [](http://badge.fury.io/rb/metainspector) [](https://circleci.com/gh/jaimeiniesta/metainspector) [](https://codeclimate.com/github/jaimeiniesta/metainspector) [](https://github.com/markets/awesome-ruby)
|
|
3
3
|
|
|
4
4
|
MetaInspector is a gem for web scraping purposes.
|
|
5
5
|
|
|
6
6
|
You give it an URL, and it lets you easily get its title, links, images, charset, description, keywords, meta tags...
|
|
7
7
|
|
|
8
|
-
## See it in action!
|
|
9
|
-
|
|
10
|
-
You can try MetaInspector using this little demo: [https://github.com/metainspector/metainspectordemo](https://github.com/metainspector/metainspectordemo)
|
|
11
|
-
|
|
12
8
|
## Installation
|
|
13
9
|
|
|
14
10
|
Install the gem from RubyGems:
|
|
@@ -309,6 +305,27 @@ If you want to disallow redirects, you can do it like this:
|
|
|
309
305
|
page = MetaInspector.new('facebook.com', :allow_redirections => false)
|
|
310
306
|
```
|
|
311
307
|
|
|
308
|
+
You can also customize how many redirects you wish to allow:
|
|
309
|
+
|
|
310
|
+
```ruby
|
|
311
|
+
page = MetaInspector.new('facebook.com', :faraday_options => { redirect: { limit: 5 } })
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
And even customize what to do in between each redirect:
|
|
315
|
+
|
|
316
|
+
```ruby
|
|
317
|
+
callback = proc do |previous_response, next_request|
|
|
318
|
+
ip_address = Resolv.getaddress(next_request.url.host)
|
|
319
|
+
raise 'Invalid address' if IPAddr.new(ip_address).private?
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
page = MetaInspector.new(url, faraday_options: { redirect: { callback: callback } })
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
The `faraday_options[:redirect]` hash is passed to the `FollowRedirects` middleware used by `Faraday`, so that we can use all available options.
|
|
327
|
+
Check them [here](https://github.com/lostisland/faraday_middleware/blob/main/lib/faraday_middleware/response/follow_redirects.rb#L44).
|
|
328
|
+
|
|
312
329
|
### Headers
|
|
313
330
|
|
|
314
331
|
By default, the following headers are set:
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# A MetaInspector example that runs a callback in between redirects.
|
|
2
|
+
# The callback raises an exception if the redirection points to a URL that resolves into a private IP address.
|
|
3
|
+
# This is one way of triggering a known security exploit called server-side request forgery (SSRF).
|
|
4
|
+
#
|
|
5
|
+
# To properly run this example you need a server which redirects to a service like nip.io.
|
|
6
|
+
# The easiest way to achieve that is running the examples/redirect_web_server.rb server in one terminal window,
|
|
7
|
+
# and calling its address with this example in another terminal window.
|
|
8
|
+
#
|
|
9
|
+
# Usage example:
|
|
10
|
+
# In terminal #1:
|
|
11
|
+
# ruby redirect_web_server.rb
|
|
12
|
+
#
|
|
13
|
+
# In terminal #2:
|
|
14
|
+
# ruby faraday_redirect_options.rb http://127.0.0.1:4567
|
|
15
|
+
|
|
16
|
+
require 'resolv'
|
|
17
|
+
require '../lib/metainspector'
|
|
18
|
+
puts "Using MetaInspector #{MetaInspector::VERSION}"
|
|
19
|
+
|
|
20
|
+
# Get the starting URL
|
|
21
|
+
url = ARGV[0] || (puts "Enter an url"; gets.strip)
|
|
22
|
+
|
|
23
|
+
# redirect options to be passed along to Faraday::FollowRedirects::Middleware
|
|
24
|
+
redirects_opts = {
|
|
25
|
+
limit: 5,
|
|
26
|
+
callback: proc do |_old_response, new_response|
|
|
27
|
+
ip_address = Resolv.getaddress(new_response.url.host)
|
|
28
|
+
raise 'Invalid address' if IPAddr.new(ip_address).private?
|
|
29
|
+
end
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
begin
|
|
33
|
+
page = MetaInspector.new(url, faraday_options: { redirect: redirects_opts })
|
|
34
|
+
rescue StandardError => e
|
|
35
|
+
puts e.message
|
|
36
|
+
else
|
|
37
|
+
puts "\nScraping #{page.url} returned these results:"
|
|
38
|
+
puts "\nTITLE: #{page.title}"
|
|
39
|
+
|
|
40
|
+
puts "\nto_hash..."
|
|
41
|
+
puts page.to_hash
|
|
42
|
+
end
|
|
@@ -58,6 +58,7 @@ module MetaInspector
|
|
|
58
58
|
def fetch
|
|
59
59
|
Timeout::timeout(fatal_timeout) do
|
|
60
60
|
@faraday_options.merge!(:url => url)
|
|
61
|
+
follow_redirects_options = @faraday_options.delete(:redirect) || {}
|
|
61
62
|
|
|
62
63
|
session = Faraday.new(@faraday_options) do |faraday|
|
|
63
64
|
faraday.request :retry, max: @retries
|
|
@@ -65,7 +66,8 @@ module MetaInspector
|
|
|
65
66
|
faraday.request :gzip
|
|
66
67
|
|
|
67
68
|
if @allow_redirections
|
|
68
|
-
|
|
69
|
+
follow_redirects_options[:limit] ||= 10
|
|
70
|
+
faraday.use Faraday::FollowRedirects::Middleware, **follow_redirects_options
|
|
69
71
|
faraday.use :cookie_jar
|
|
70
72
|
end
|
|
71
73
|
|
|
@@ -84,7 +86,9 @@ module MetaInspector
|
|
|
84
86
|
req.options.open_timeout = @read_timeout
|
|
85
87
|
end
|
|
86
88
|
|
|
87
|
-
@
|
|
89
|
+
if @allow_redirections
|
|
90
|
+
@url.url = response.env.url.to_s
|
|
91
|
+
end
|
|
88
92
|
|
|
89
93
|
response
|
|
90
94
|
end
|
data/meta_inspector.gemspec
CHANGED
|
@@ -5,7 +5,7 @@ Gem::Specification.new do |gem|
|
|
|
5
5
|
gem.email = "jaimeiniesta@gmail.com"
|
|
6
6
|
gem.description = %q{MetaInspector lets you scrape a web page and get its links, images, texts, meta tags...}
|
|
7
7
|
gem.summary = %q{MetaInspector is a ruby gem for web scraping purposes, that returns metadata from a given URL}
|
|
8
|
-
gem.homepage = "https://github.com/
|
|
8
|
+
gem.homepage = "https://github.com/jaimeiniesta/metainspector"
|
|
9
9
|
gem.license = "MIT"
|
|
10
10
|
|
|
11
11
|
gem.files = `git ls-files`.split("\n")
|
|
@@ -19,10 +19,10 @@ Gem::Specification.new do |gem|
|
|
|
19
19
|
gem.add_dependency 'faraday-cookie_jar', '~> 0.0'
|
|
20
20
|
gem.add_dependency 'faraday-encoding', '~> 0.0'
|
|
21
21
|
gem.add_dependency 'faraday-follow_redirects', '~> 0.3'
|
|
22
|
-
gem.add_dependency 'faraday-gzip', '
|
|
23
|
-
gem.add_dependency 'faraday-http-cache', '~> 2.
|
|
22
|
+
gem.add_dependency 'faraday-gzip', '>= 0.1', '< 2.0'
|
|
23
|
+
gem.add_dependency 'faraday-http-cache', '~> 2.5'
|
|
24
24
|
gem.add_dependency 'faraday-retry', '~> 2.0'
|
|
25
|
-
gem.add_dependency 'addressable', '~> 2.8'
|
|
25
|
+
gem.add_dependency 'addressable', '~> 2.8.4'
|
|
26
26
|
gem.add_dependency 'fastimage', '~> 2.2'
|
|
27
27
|
gem.add_dependency 'nesty', '~> 1.0'
|
|
28
28
|
|
|
@@ -31,5 +31,8 @@ Gem::Specification.new do |gem|
|
|
|
31
31
|
gem.add_development_dependency 'awesome_print', '~> 1.9'
|
|
32
32
|
gem.add_development_dependency 'rake', '~> 13.0'
|
|
33
33
|
gem.add_development_dependency 'pry', '~> 0.14'
|
|
34
|
+
gem.add_development_dependency 'puma', '~> 6.2.2'
|
|
34
35
|
gem.add_development_dependency 'rubocop', '~> 1.34'
|
|
36
|
+
gem.add_development_dependency 'resolv', '~> 0.2.2'
|
|
37
|
+
gem.add_development_dependency 'sinatra', '~> 3.0.6'
|
|
35
38
|
end
|
data/spec/document_spec.rb
CHANGED
|
@@ -130,11 +130,11 @@ describe MetaInspector::Document do
|
|
|
130
130
|
|
|
131
131
|
describe 'url normalization' do
|
|
132
132
|
it 'should normalize by default' do
|
|
133
|
-
expect(MetaInspector.new('http://example.com
|
|
133
|
+
expect(MetaInspector.new('http://example.com?name=joe martins', allow_redirections: false).url).to eq('http://example.com/?name=joe%20martins')
|
|
134
134
|
end
|
|
135
135
|
|
|
136
136
|
it 'should not normalize if the normalize_url option is false' do
|
|
137
|
-
expect(MetaInspector.new('http://example.com
|
|
137
|
+
expect(MetaInspector.new('http://example.com?name=joe martins', normalize_url: false, allow_redirections: false).url).to eq('http://example.com?name=joe martins')
|
|
138
138
|
end
|
|
139
139
|
end
|
|
140
140
|
|
|
@@ -108,7 +108,7 @@ describe MetaInspector do
|
|
|
108
108
|
|
|
109
109
|
it "should handle links that have an invalid byte sequence" do
|
|
110
110
|
m = MetaInspector.new('http://example.com/invalid_byte_seq')
|
|
111
|
-
expect(m.links.all).to eq(["http://pagerankalert.posterous.com/", "http://
|
|
111
|
+
expect(m.links.all).to eq(["http://pagerankalert.posterous.com/", "http://twitter.com/pagerankalert"])
|
|
112
112
|
end
|
|
113
113
|
|
|
114
114
|
end
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
require 'spec_helper'
|
|
2
|
+
require 'resolv'
|
|
3
|
+
|
|
4
|
+
class PrivateIPAddressError < StandardError; end
|
|
2
5
|
|
|
3
6
|
describe MetaInspector do
|
|
4
7
|
describe "redirections" do
|
|
@@ -47,6 +50,25 @@ describe MetaInspector do
|
|
|
47
50
|
expect(page.url).to eq("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
|
|
48
51
|
end
|
|
49
52
|
end
|
|
53
|
+
|
|
54
|
+
context "when there is a callback to be ran between redirects that blocks redirections to private IP addresses" do
|
|
55
|
+
it "raises an exception" do
|
|
56
|
+
stub_request(:get, "https://www.facebook.com/")
|
|
57
|
+
.to_return(:status => 302,
|
|
58
|
+
:headers => { "Location" => "http://10.0.0.0/" })
|
|
59
|
+
|
|
60
|
+
redirect_options = {
|
|
61
|
+
callback: proc do |_previous_response, next_request|
|
|
62
|
+
ip_address = Resolv.getaddress(next_request.url.host)
|
|
63
|
+
raise PrivateIPAddressError if IPAddr.new(ip_address).private?
|
|
64
|
+
end
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
expect {
|
|
68
|
+
MetaInspector.new("https://www.facebook.com/", faraday_options: { redirect: redirect_options })
|
|
69
|
+
}.to raise_error PrivateIPAddressError
|
|
70
|
+
end
|
|
71
|
+
end
|
|
50
72
|
end
|
|
51
73
|
|
|
52
74
|
private
|
data/spec/spec_helper.rb
CHANGED
|
@@ -50,7 +50,8 @@ RSpec.configure do |config|
|
|
|
50
50
|
stub_request(:get, "http://example.com/author_in_body").to_return(fixture_file("author_in_body.response"))
|
|
51
51
|
stub_request(:get, "http://example.com/author_in_link").to_return(fixture_file("author_in_link.response"))
|
|
52
52
|
stub_request(:get, "http://example.com/author_in_twitter").to_return(fixture_file("author_in_twitter.response"))
|
|
53
|
-
stub_request(:get, "http://example.com
|
|
53
|
+
stub_request(:get, "http://example.com/?name=joe martins").to_return(fixture_file("example.response"))
|
|
54
|
+
stub_request(:get, "http://example.com/?name=joe+martins").to_return(fixture_file("example.response"))
|
|
54
55
|
stub_request(:get, "http://facebook.com/").to_return(fixture_file("facebook.com.response"))
|
|
55
56
|
stub_request(:get, "http://international.com").to_return(fixture_file("international.response"))
|
|
56
57
|
stub_request(:get, "http://pagerankalert-shortcut-and-icon.com").to_return(fixture_file("pagerankalert-shortcut-and-icon.com.response"))
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: metainspector
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.
|
|
4
|
+
version: 5.15.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jaime Iniesta
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-05-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -84,30 +84,36 @@ dependencies:
|
|
|
84
84
|
name: faraday-gzip
|
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
|
86
86
|
requirements:
|
|
87
|
-
- - "
|
|
87
|
+
- - ">="
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
89
|
version: '0.1'
|
|
90
|
+
- - "<"
|
|
91
|
+
- !ruby/object:Gem::Version
|
|
92
|
+
version: '2.0'
|
|
90
93
|
type: :runtime
|
|
91
94
|
prerelease: false
|
|
92
95
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
96
|
requirements:
|
|
94
|
-
- - "
|
|
97
|
+
- - ">="
|
|
95
98
|
- !ruby/object:Gem::Version
|
|
96
99
|
version: '0.1'
|
|
100
|
+
- - "<"
|
|
101
|
+
- !ruby/object:Gem::Version
|
|
102
|
+
version: '2.0'
|
|
97
103
|
- !ruby/object:Gem::Dependency
|
|
98
104
|
name: faraday-http-cache
|
|
99
105
|
requirement: !ruby/object:Gem::Requirement
|
|
100
106
|
requirements:
|
|
101
107
|
- - "~>"
|
|
102
108
|
- !ruby/object:Gem::Version
|
|
103
|
-
version: '2.
|
|
109
|
+
version: '2.5'
|
|
104
110
|
type: :runtime
|
|
105
111
|
prerelease: false
|
|
106
112
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
113
|
requirements:
|
|
108
114
|
- - "~>"
|
|
109
115
|
- !ruby/object:Gem::Version
|
|
110
|
-
version: '2.
|
|
116
|
+
version: '2.5'
|
|
111
117
|
- !ruby/object:Gem::Dependency
|
|
112
118
|
name: faraday-retry
|
|
113
119
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -128,14 +134,14 @@ dependencies:
|
|
|
128
134
|
requirements:
|
|
129
135
|
- - "~>"
|
|
130
136
|
- !ruby/object:Gem::Version
|
|
131
|
-
version:
|
|
137
|
+
version: 2.8.4
|
|
132
138
|
type: :runtime
|
|
133
139
|
prerelease: false
|
|
134
140
|
version_requirements: !ruby/object:Gem::Requirement
|
|
135
141
|
requirements:
|
|
136
142
|
- - "~>"
|
|
137
143
|
- !ruby/object:Gem::Version
|
|
138
|
-
version:
|
|
144
|
+
version: 2.8.4
|
|
139
145
|
- !ruby/object:Gem::Dependency
|
|
140
146
|
name: fastimage
|
|
141
147
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -234,6 +240,20 @@ dependencies:
|
|
|
234
240
|
- - "~>"
|
|
235
241
|
- !ruby/object:Gem::Version
|
|
236
242
|
version: '0.14'
|
|
243
|
+
- !ruby/object:Gem::Dependency
|
|
244
|
+
name: puma
|
|
245
|
+
requirement: !ruby/object:Gem::Requirement
|
|
246
|
+
requirements:
|
|
247
|
+
- - "~>"
|
|
248
|
+
- !ruby/object:Gem::Version
|
|
249
|
+
version: 6.2.2
|
|
250
|
+
type: :development
|
|
251
|
+
prerelease: false
|
|
252
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
253
|
+
requirements:
|
|
254
|
+
- - "~>"
|
|
255
|
+
- !ruby/object:Gem::Version
|
|
256
|
+
version: 6.2.2
|
|
237
257
|
- !ruby/object:Gem::Dependency
|
|
238
258
|
name: rubocop
|
|
239
259
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -248,6 +268,34 @@ dependencies:
|
|
|
248
268
|
- - "~>"
|
|
249
269
|
- !ruby/object:Gem::Version
|
|
250
270
|
version: '1.34'
|
|
271
|
+
- !ruby/object:Gem::Dependency
|
|
272
|
+
name: resolv
|
|
273
|
+
requirement: !ruby/object:Gem::Requirement
|
|
274
|
+
requirements:
|
|
275
|
+
- - "~>"
|
|
276
|
+
- !ruby/object:Gem::Version
|
|
277
|
+
version: 0.2.2
|
|
278
|
+
type: :development
|
|
279
|
+
prerelease: false
|
|
280
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
281
|
+
requirements:
|
|
282
|
+
- - "~>"
|
|
283
|
+
- !ruby/object:Gem::Version
|
|
284
|
+
version: 0.2.2
|
|
285
|
+
- !ruby/object:Gem::Dependency
|
|
286
|
+
name: sinatra
|
|
287
|
+
requirement: !ruby/object:Gem::Requirement
|
|
288
|
+
requirements:
|
|
289
|
+
- - "~>"
|
|
290
|
+
- !ruby/object:Gem::Version
|
|
291
|
+
version: 3.0.6
|
|
292
|
+
type: :development
|
|
293
|
+
prerelease: false
|
|
294
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
295
|
+
requirements:
|
|
296
|
+
- - "~>"
|
|
297
|
+
- !ruby/object:Gem::Version
|
|
298
|
+
version: 3.0.6
|
|
251
299
|
description: MetaInspector lets you scrape a web page and get its links, images, texts,
|
|
252
300
|
meta tags...
|
|
253
301
|
email: jaimeiniesta@gmail.com
|
|
@@ -269,7 +317,9 @@ files:
|
|
|
269
317
|
- Rakefile
|
|
270
318
|
- bin/console
|
|
271
319
|
- examples/basic_scraping.rb
|
|
320
|
+
- examples/faraday_redirect_options.rb
|
|
272
321
|
- examples/link_checker.rb
|
|
322
|
+
- examples/redirect_web_server.rb
|
|
273
323
|
- examples/spider.rb
|
|
274
324
|
- lib/meta_inspector.rb
|
|
275
325
|
- lib/meta_inspector/document.rb
|
|
@@ -363,7 +413,7 @@ files:
|
|
|
363
413
|
- spec/request_spec.rb
|
|
364
414
|
- spec/spec_helper.rb
|
|
365
415
|
- spec/url_spec.rb
|
|
366
|
-
homepage: https://github.com/
|
|
416
|
+
homepage: https://github.com/jaimeiniesta/metainspector
|
|
367
417
|
licenses:
|
|
368
418
|
- MIT
|
|
369
419
|
metadata: {}
|
|
@@ -382,7 +432,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
382
432
|
- !ruby/object:Gem::Version
|
|
383
433
|
version: '0'
|
|
384
434
|
requirements: []
|
|
385
|
-
rubygems_version: 3.
|
|
435
|
+
rubygems_version: 3.4.12
|
|
386
436
|
signing_key:
|
|
387
437
|
specification_version: 4
|
|
388
438
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns metadata
|