webinspector 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +2 -0
- data/README.md +58 -28
- data/Rakefile +2 -1
- data/bin/console +4 -3
- data/lib/web_inspector/inspector.rb +192 -83
- data/lib/web_inspector/meta.rb +36 -15
- data/lib/web_inspector/page.rb +145 -61
- data/lib/web_inspector/request.rb +10 -8
- data/lib/web_inspector/version.rb +3 -1
- data/lib/web_inspector.rb +4 -2
- data/lib/webinspector.rb +3 -1
- data/webinspector.gemspec +33 -26
- metadata +103 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0413d3ff948ab6efff6a1cbe8a7844287149ad06f09353655e6cb208968f9481
|
4
|
+
data.tar.gz: 152b950595afb57adc522da24c6959f71d160ba903b3d01ce6ee5f6a8b4d81d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6230493b59a0d23585be729ec98706cfdbd6852e2de2d65db83d1638f85110369d41f2275b8e0aa09b58008d53924d036840ce63523d9004f19275999be90f8
|
7
|
+
data.tar.gz: dad518b0b04c1e341c14c29438ebcf84f4602bf394254a4c61382ad79ce53dae2ac92f138f331d5bf089d2220011f14b0cde0a79608a91177b2bda2ab1773a96
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
#
|
1
|
+
# WebInspector
|
2
2
|
|
3
|
-
Ruby gem to inspect
|
3
|
+
Ruby gem to inspect web pages. It scrapes a given URL and returns its title, description, meta tags, links, images, and more.
|
4
|
+
|
5
|
+
<a href="https://codeclimate.com/github/davidesantangelo/webinspector"><img src="https://codeclimate.com/github/davidesantangelo/webinspector/badges/gpa.svg" /></a>
|
4
6
|
|
5
|
-
## See it in action!
|
6
7
|
|
7
|
-
You can try WebInspector live at this little demo: [https://scrappet.herokuapp.com](https://scrappet.herokuapp.com)
|
8
8
|
## Installation
|
9
9
|
|
10
10
|
Add this line to your application's Gemfile:
|
@@ -23,58 +23,88 @@ Or install it yourself as:
|
|
23
23
|
|
24
24
|
## Usage
|
25
25
|
|
26
|
-
Initialize a WebInspector instance
|
26
|
+
### Initialize a WebInspector instance
|
27
27
|
|
28
28
|
```ruby
|
29
|
-
page = WebInspector.new('http://
|
29
|
+
page = WebInspector.new('http://example.com')
|
30
30
|
```
|
31
31
|
|
32
|
-
|
32
|
+
### With options
|
33
33
|
|
34
|
-
|
34
|
+
```ruby
|
35
|
+
page = WebInspector.new('http://example.com', {
|
36
|
+
timeout: 30, # Request timeout in seconds (default: 30)
|
37
|
+
retries: 3, # Number of retries (default: 3)
|
38
|
+
headers: {'User-Agent': 'Custom UA'} # Custom HTTP headers
|
39
|
+
})
|
40
|
+
```
|
41
|
+
|
42
|
+
### Accessing response status and headers
|
35
43
|
|
36
44
|
```ruby
|
37
45
|
page.response.status # 200
|
38
|
-
page.response.headers # { "server"=>"apache", "content-type"=>"text/html; charset=utf-8",
|
46
|
+
page.response.headers # { "server"=>"apache", "content-type"=>"text/html; charset=utf-8", ... }
|
47
|
+
page.status_code # 200
|
48
|
+
page.success? # true if the page was loaded successfully
|
49
|
+
page.error_message # returns the error message if any
|
39
50
|
```
|
40
51
|
|
41
|
-
|
42
|
-
|
43
|
-
You can see the data like this:
|
52
|
+
### Accessing page data
|
44
53
|
|
45
54
|
```ruby
|
46
|
-
page.url
|
47
|
-
page.scheme
|
48
|
-
page.host
|
49
|
-
page.port
|
50
|
-
page.title
|
51
|
-
page.description
|
52
|
-
page.links
|
53
|
-
page.images
|
54
|
-
page.meta
|
55
|
+
page.url # URL of the page
|
56
|
+
page.scheme # Scheme of the page (http, https)
|
57
|
+
page.host # Hostname of the page (like, example.com, without the scheme)
|
58
|
+
page.port # Port of the page
|
59
|
+
page.title # title of the page from the head section
|
60
|
+
page.description # description of the page
|
61
|
+
page.links # array of all links found on the page (absolute URLs)
|
62
|
+
page.images # array of all images found on the page (absolute URLs)
|
63
|
+
page.meta # meta tags of the page
|
64
|
+
page.favicon # favicon URL if available
|
55
65
|
```
|
56
66
|
|
57
|
-
|
67
|
+
### Working with meta tags
|
58
68
|
|
59
69
|
```ruby
|
60
|
-
page.meta #
|
70
|
+
page.meta # all meta tags
|
61
71
|
page.meta['description'] # meta description
|
62
|
-
page.meta['keywords']
|
72
|
+
page.meta['keywords'] # meta keywords
|
73
|
+
page.meta['og:title'] # OpenGraph title
|
74
|
+
```
|
75
|
+
|
76
|
+
### Filtering links and images by domain
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
page.domain_links('example.com') # returns only links pointing to example.com
|
80
|
+
page.domain_images('example.com') # returns only images hosted on example.com
|
81
|
+
```
|
82
|
+
|
83
|
+
### Searching for words
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
page.find(["ruby", "rails"]) # returns [{"ruby"=>3}, {"rails"=>1}]
|
87
|
+
```
|
88
|
+
|
89
|
+
### Export all data to JSON
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
page.to_hash # returns a hash with all page data
|
63
93
|
```
|
64
94
|
|
65
95
|
## Contributors
|
66
96
|
|
67
97
|
* Steven Shelby ([@stevenshelby](https://github.com/stevenshelby))
|
68
|
-
|
98
|
+
* Sam Nissen ([@samnissen](https://github.com/samnissen))
|
69
99
|
|
70
100
|
## License
|
71
|
-
|
101
|
+
|
102
|
+
The WebInspector gem is released under the MIT License.
|
72
103
|
|
73
104
|
## Contributing
|
74
105
|
|
75
|
-
1. Fork it ( https://github.com/
|
106
|
+
1. Fork it ( https://github.com/davidesantangelo/webinspector/fork )
|
76
107
|
2. Create your feature branch (`git checkout -b my-new-feature`)
|
77
108
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
78
109
|
4. Push to the branch (`git push origin my-new-feature`)
|
79
110
|
5. Create a new Pull Request
|
80
|
-
>>>>>>> develop
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'webinspector'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "webinspector"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start
|
@@ -1,127 +1,236 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require File.expand_path(File.join(File.dirname(__FILE__), 'meta'))
|
2
4
|
|
3
5
|
module WebInspector
|
4
6
|
class Inspector
|
7
|
+
attr_reader :page, :url, :host, :meta
|
5
8
|
|
6
9
|
def initialize(page)
|
7
10
|
@page = page
|
8
11
|
@meta = WebInspector::Meta.new(page).meta
|
12
|
+
@base_url = nil
|
13
|
+
end
|
14
|
+
|
15
|
+
def set_url(url, host)
|
16
|
+
@url = url
|
17
|
+
@host = host
|
9
18
|
end
|
10
19
|
|
11
20
|
def title
|
12
|
-
@page.css('title').inner_text.strip
|
21
|
+
@page.css('title').inner_text.strip
|
22
|
+
rescue StandardError
|
23
|
+
nil
|
13
24
|
end
|
14
25
|
|
15
26
|
def description
|
16
|
-
@meta['description'] || snippet
|
27
|
+
@meta['description'] || @meta['og:description'] || snippet
|
17
28
|
end
|
18
29
|
|
19
30
|
def body
|
20
31
|
@page.css('body').to_html
|
21
32
|
end
|
22
33
|
|
23
|
-
|
24
|
-
|
34
|
+
# Search for specific words in the page content
|
35
|
+
# @param words [Array<String>] List of words to search for
|
36
|
+
# @return [Array<Hash>] Counts of word occurrences
|
37
|
+
def find(words)
|
38
|
+
text = @page.at('html').inner_text
|
39
|
+
counter(text.downcase, words)
|
25
40
|
end
|
26
41
|
|
42
|
+
# Get all links from the page
|
43
|
+
# @return [Array<String>] Array of URLs
|
27
44
|
def links
|
28
|
-
|
29
|
-
|
45
|
+
@links ||= begin
|
46
|
+
links = []
|
47
|
+
@page.css('a').each do |a|
|
48
|
+
href = a[:href]
|
49
|
+
next unless href
|
50
|
+
|
51
|
+
# Skip javascript and mailto links
|
52
|
+
next if href.start_with?('javascript:', 'mailto:', 'tel:')
|
53
|
+
|
54
|
+
# Clean and normalize URL
|
55
|
+
href = href.strip
|
56
|
+
|
57
|
+
begin
|
58
|
+
absolute_url = make_absolute_url(href)
|
59
|
+
links << absolute_url if absolute_url
|
60
|
+
rescue URI::InvalidURIError
|
61
|
+
# Skip invalid URLs
|
62
|
+
end
|
63
|
+
end
|
64
|
+
links.uniq
|
65
|
+
end
|
30
66
|
end
|
31
|
-
|
32
|
-
|
67
|
+
|
68
|
+
# Get links from a specific domain
|
69
|
+
# @param user_domain [String] Domain to filter links by
|
70
|
+
# @param host [String] Current host
|
71
|
+
# @return [Array<String>] Filtered links
|
72
|
+
def domain_links(user_domain, host = nil)
|
33
73
|
@host ||= host
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
74
|
+
|
75
|
+
return [] if links.empty?
|
76
|
+
|
77
|
+
# Handle nil user_domain
|
78
|
+
user_domain = @host.to_s if user_domain.nil? || user_domain.empty?
|
79
|
+
|
80
|
+
# Normalize domain for comparison
|
81
|
+
user_domain = user_domain.to_s.downcase.gsub(/\s+/, '')
|
82
|
+
user_domain = user_domain.sub(/^www\./, '') # Remove www prefix for comparison
|
83
|
+
|
84
|
+
links.select do |link|
|
85
|
+
uri = URI.parse(link.to_s)
|
86
|
+
next false unless uri.host # Skip URLs without hosts
|
87
|
+
|
88
|
+
uri_host = uri.host.to_s.downcase
|
89
|
+
uri_host = uri_host.sub(/^www\./, '') # Remove www prefix for comparison
|
90
|
+
uri_host.include?(user_domain)
|
91
|
+
rescue URI::InvalidURIError, NoMethodError
|
92
|
+
false
|
48
93
|
end
|
49
|
-
|
50
|
-
return domain_links.compact
|
51
94
|
end
|
52
|
-
|
53
|
-
|
95
|
+
|
96
|
+
# Get all images from the page
|
97
|
+
# @return [Array<String>] Array of image URLs
|
98
|
+
def images
|
99
|
+
@images ||= begin
|
100
|
+
images = []
|
101
|
+
@page.css('img').each do |img|
|
102
|
+
src = img[:src]
|
103
|
+
next unless src
|
104
|
+
|
105
|
+
# Clean and normalize URL
|
106
|
+
src = src.strip
|
107
|
+
|
108
|
+
begin
|
109
|
+
absolute_url = make_absolute_url(src)
|
110
|
+
images << absolute_url if absolute_url
|
111
|
+
rescue URI::InvalidURIError, URI::BadURIError
|
112
|
+
# Skip invalid URLs
|
113
|
+
end
|
114
|
+
end
|
115
|
+
images.uniq.compact
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Get images from a specific domain
|
120
|
+
# @param user_domain [String] Domain to filter images by
|
121
|
+
# @param host [String] Current host
|
122
|
+
# @return [Array<String>] Filtered images
|
123
|
+
def domain_images(user_domain, host = nil)
|
54
124
|
@host ||= host
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
125
|
+
|
126
|
+
return [] if images.empty?
|
127
|
+
|
128
|
+
# Handle nil user_domain
|
129
|
+
user_domain = @host.to_s if user_domain.nil? || user_domain.empty?
|
130
|
+
|
131
|
+
# Normalize domain for comparison
|
132
|
+
user_domain = user_domain.to_s.downcase.gsub(/\s+/, '')
|
133
|
+
user_domain = user_domain.sub(/^www\./, '') # Remove www prefix for comparison
|
134
|
+
|
135
|
+
images.select do |img|
|
136
|
+
uri = URI.parse(img.to_s)
|
137
|
+
next false unless uri.host # Skip URLs without hosts
|
138
|
+
|
139
|
+
uri_host = uri.host.to_s.downcase
|
140
|
+
uri_host = uri_host.sub(/^www\./, '') # Remove www prefix for comparison
|
141
|
+
uri_host.include?(user_domain)
|
142
|
+
rescue URI::InvalidURIError, NoMethodError
|
143
|
+
false
|
68
144
|
end
|
69
|
-
|
70
|
-
return domain_images.compact
|
71
145
|
end
|
72
|
-
|
73
|
-
|
146
|
+
|
147
|
+
private
|
148
|
+
|
149
|
+
# Count occurrences of words in text
|
150
|
+
# @param text [String] Text to search in
|
151
|
+
# @param words [Array<String>] Words to find
|
152
|
+
# @return [Array<Hash>] Count results
|
153
|
+
def counter(text, words)
|
154
|
+
words.map do |word|
|
155
|
+
{ word => text.scan(/#{word.downcase}/).size }
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# Validate a URL domain
|
160
|
+
# @param u [String] URL to validate
|
161
|
+
# @return [PublicSuffix::Domain, false] Domain object or false if invalid
|
74
162
|
def validate_url_domain(u)
|
75
|
-
|
76
|
-
u =
|
77
|
-
|
78
|
-
|
163
|
+
u = u.to_s
|
164
|
+
u = '/' if u.empty?
|
165
|
+
|
79
166
|
begin
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
httpped_url
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
# Make sure the URL passes ICANN rules.
|
93
|
-
# The PublicSuffix object splits the domain and subdomain
|
94
|
-
# (unlike URI), which allows more liberal URL matching.
|
95
|
-
return PublicSuffix.parse(uri.host)
|
96
|
-
rescue URI::InvalidURIError, PublicSuffix::DomainInvalid => e
|
97
|
-
return false
|
167
|
+
domained_url = if !(u.split('/').first || '').match(/(:|\.)/)
|
168
|
+
@host + u
|
169
|
+
else
|
170
|
+
u
|
171
|
+
end
|
172
|
+
|
173
|
+
httpped_url = domained_url.start_with?('http') ? domained_url : "http://#{domained_url}"
|
174
|
+
uri = URI.parse(httpped_url)
|
175
|
+
|
176
|
+
PublicSuffix.parse(uri.host)
|
177
|
+
rescue URI::InvalidURIError, PublicSuffix::DomainInvalid
|
178
|
+
false
|
98
179
|
end
|
99
180
|
end
|
100
181
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
182
|
+
# Make a URL absolute
|
183
|
+
# @param url [String] URL to make absolute
|
184
|
+
# @return [String, nil] Absolute URL or nil if invalid
|
185
|
+
def make_absolute_url(url)
|
186
|
+
return nil if url.nil? || url.empty?
|
105
187
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
@
|
111
|
-
|
188
|
+
# If it's already absolute, return it
|
189
|
+
return url if url.start_with?('http://', 'https://')
|
190
|
+
|
191
|
+
# Get base URL from the page if not already set
|
192
|
+
if @base_url.nil?
|
193
|
+
base_tag = @page.at_css('base[href]')
|
194
|
+
@base_url = base_tag ? base_tag['href'] : nil
|
112
195
|
end
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
196
|
+
|
197
|
+
begin
|
198
|
+
# Try joining with base URL first if available
|
199
|
+
if @base_url && !@base_url.empty?
|
200
|
+
begin
|
201
|
+
return URI.join(@base_url, url).to_s
|
202
|
+
rescue URI::InvalidURIError, URI::BadURIError
|
203
|
+
# Fall through to next method
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
# If we have @url, try to use it
|
208
|
+
if @url
|
209
|
+
begin
|
210
|
+
return URI.join(@url, url).to_s
|
211
|
+
rescue URI::InvalidURIError, URI::BadURIError
|
212
|
+
# Fall through to next method
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# Otherwise use a default http:// base if url is absolute path
|
217
|
+
return "http://#{@host}#{url}" if url.start_with?('/')
|
218
|
+
|
219
|
+
# For truly relative URLs with no base, we need to make our best guess
|
220
|
+
return "http://#{@host}/#{url}" if @host
|
221
|
+
|
222
|
+
# Last resort, return the original
|
223
|
+
url
|
224
|
+
rescue URI::InvalidURIError, URI::BadURIError
|
225
|
+
url # Return original instead of nil to be more lenient
|
119
226
|
end
|
120
227
|
end
|
121
228
|
|
229
|
+
# Extract a snippet from the first long paragraph
|
230
|
+
# @return [String] Text snippet
|
122
231
|
def snippet
|
123
232
|
first_long_paragraph = @page.search('//p[string-length() >= 120]').first
|
124
|
-
first_long_paragraph ? first_long_paragraph.text : ''
|
233
|
+
first_long_paragraph ? first_long_paragraph.text.strip[0..255] : ''
|
125
234
|
end
|
126
235
|
end
|
127
|
-
end
|
236
|
+
end
|
data/lib/web_inspector/meta.rb
CHANGED
@@ -1,15 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module WebInspector
|
2
4
|
class Meta
|
3
|
-
|
5
|
+
def initialize(page)
|
4
6
|
@page = page
|
5
7
|
end
|
6
8
|
|
7
9
|
def meta_tags
|
8
10
|
{
|
9
|
-
'name'
|
10
|
-
'http-equiv'
|
11
|
-
'property'
|
12
|
-
'charset'
|
11
|
+
'name' => meta_tags_by('name'),
|
12
|
+
'http-equiv' => meta_tags_by('http-equiv'),
|
13
|
+
'property' => meta_tags_by('property'),
|
14
|
+
'charset' => [charset_from_meta_charset],
|
15
|
+
'itemprop' => meta_tags_by('itemprop') # Add support for schema.org microdata
|
13
16
|
}
|
14
17
|
end
|
15
18
|
|
@@ -19,30 +22,48 @@ module WebInspector
|
|
19
22
|
|
20
23
|
def meta
|
21
24
|
meta_tag['name']
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
+
.merge(meta_tag['http-equiv'])
|
26
|
+
.merge(meta_tag['property'])
|
27
|
+
.merge(meta_tag['itemprop'] || {})
|
28
|
+
.merge('charset' => meta_tag['charset'])
|
25
29
|
end
|
26
30
|
|
27
31
|
def charset
|
28
|
-
@charset ||=
|
32
|
+
@charset ||= charset_from_meta_charset || charset_from_meta_content_type || charset_from_header || 'utf-8'
|
29
33
|
end
|
30
34
|
|
31
35
|
private
|
32
36
|
|
33
37
|
def charset_from_meta_charset
|
34
|
-
@page.css('meta[charset]')[0].attributes['charset'].value
|
38
|
+
@page.css('meta[charset]')[0].attributes['charset'].value
|
39
|
+
rescue StandardError
|
40
|
+
nil
|
35
41
|
end
|
36
42
|
|
37
43
|
def charset_from_meta_content_type
|
38
|
-
@page.css("meta[http-equiv='Content-Type']")[0].attributes['content'].value.split(';')[1].split('=')[1]
|
44
|
+
@page.css("meta[http-equiv='Content-Type']")[0].attributes['content'].value.split(';')[1].strip.split('=')[1]
|
45
|
+
rescue StandardError
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def charset_from_header
|
50
|
+
# Try to get charset from Content-Type header if available
|
51
|
+
nil
|
39
52
|
end
|
40
53
|
|
41
|
-
|
54
|
+
def meta_tags_by(attribute)
|
42
55
|
hash = {}
|
43
56
|
@page.css("meta[@#{attribute}]").map do |tag|
|
44
|
-
name
|
45
|
-
|
57
|
+
name = begin
|
58
|
+
tag.attributes[attribute].value.downcase
|
59
|
+
rescue StandardError
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
content = begin
|
63
|
+
tag.attributes['content'].value
|
64
|
+
rescue StandardError
|
65
|
+
nil
|
66
|
+
end
|
46
67
|
|
47
68
|
if name && content
|
48
69
|
hash[name] ||= []
|
@@ -64,4 +85,4 @@ module WebInspector
|
|
64
85
|
end
|
65
86
|
end
|
66
87
|
end
|
67
|
-
end
|
88
|
+
end
|
data/lib/web_inspector/page.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'uri'
|
3
5
|
require 'open-uri'
|
@@ -5,129 +7,211 @@ require 'open_uri_redirections'
|
|
5
7
|
require 'faraday'
|
6
8
|
require 'public_suffix'
|
7
9
|
|
10
|
+
# Explicitly load Faraday::Retry if available
|
11
|
+
begin
|
12
|
+
require 'faraday/retry'
|
13
|
+
rescue LoadError
|
14
|
+
# Faraday retry is not available
|
15
|
+
end
|
16
|
+
|
8
17
|
require File.expand_path(File.join(File.dirname(__FILE__), 'inspector'))
|
9
18
|
require File.expand_path(File.join(File.dirname(__FILE__), 'request'))
|
10
19
|
|
11
20
|
module WebInspector
|
12
21
|
class Page
|
13
|
-
attr_reader :url, :scheme, :host, :port, :title, :description, :body, :meta, :links,
|
14
|
-
|
22
|
+
attr_reader :url, :scheme, :host, :port, :title, :description, :body, :meta, :links,
|
23
|
+
:domain_links, :domain_images, :images, :response, :status_code, :favicon
|
24
|
+
|
25
|
+
DEFAULT_TIMEOUT = 30
|
26
|
+
DEFAULT_RETRIES = 3
|
27
|
+
DEFAULT_USER_AGENT = -> { "WebInspector/#{WebInspector::VERSION} (+https://github.com/davidesantangelo/webinspector)" }
|
28
|
+
|
29
|
+
# Initialize a new WebInspector Page
|
30
|
+
#
|
31
|
+
# @param url [String] The URL to inspect
|
32
|
+
# @param options [Hash] Optional parameters
|
33
|
+
# @option options [Integer] :timeout Request timeout in seconds
|
34
|
+
# @option options [Integer] :retries Number of retries for failed requests
|
35
|
+
# @option options [Hash] :headers Custom HTTP headers
|
36
|
+
# @option options [Boolean] :allow_redirections Whether to follow redirects
|
37
|
+
# @option options [String] :user_agent Custom user agent
|
15
38
|
def initialize(url, options = {})
|
16
39
|
@url = url
|
17
40
|
@options = options
|
41
|
+
@retries = options[:retries] || DEFAULT_RETRIES
|
42
|
+
@timeout = options[:timeout] || DEFAULT_TIMEOUT
|
43
|
+
@headers = options[:headers] || { 'User-Agent' => options[:user_agent] || DEFAULT_USER_AGENT.call }
|
44
|
+
@allow_redirections = options[:allow_redirections].nil? || options[:allow_redirections]
|
45
|
+
|
18
46
|
@request = WebInspector::Request.new(url)
|
19
|
-
@inspector = WebInspector::Inspector.new(page)
|
20
|
-
end
|
21
47
|
|
22
|
-
|
23
|
-
|
48
|
+
begin
|
49
|
+
@inspector = WebInspector::Inspector.new(page)
|
50
|
+
@inspector.set_url(url, host)
|
51
|
+
@status_code = 200
|
52
|
+
rescue StandardError => e
|
53
|
+
@error = e
|
54
|
+
@status_code = e.respond_to?(:status_code) ? e.status_code : 500
|
55
|
+
end
|
24
56
|
end
|
25
57
|
|
26
|
-
|
27
|
-
|
58
|
+
# Check if the page was successfully loaded
|
59
|
+
#
|
60
|
+
# @return [Boolean] true if the page was loaded, false otherwise
|
61
|
+
def success?
|
62
|
+
!@inspector.nil? && !@error
|
28
63
|
end
|
29
64
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
@inspector.links
|
65
|
+
# Get the error message if any
|
66
|
+
#
|
67
|
+
# @return [String, nil] The error message or nil if no error
|
68
|
+
def error_message
|
69
|
+
@error&.message
|
36
70
|
end
|
37
71
|
|
38
|
-
|
39
|
-
|
40
|
-
|
72
|
+
# Delegate methods to inspector
|
73
|
+
%i[title description body links images meta].each do |method|
|
74
|
+
define_method(method) do
|
75
|
+
return nil unless success?
|
41
76
|
|
42
|
-
|
43
|
-
|
77
|
+
@inspector.send(method)
|
78
|
+
end
|
44
79
|
end
|
45
80
|
|
46
|
-
|
47
|
-
|
48
|
-
|
81
|
+
# Special case for find method that takes arguments
|
82
|
+
def find(words)
|
83
|
+
return nil unless success?
|
49
84
|
|
50
|
-
|
51
|
-
@request.host
|
85
|
+
@inspector.find(words)
|
52
86
|
end
|
53
87
|
|
54
|
-
|
55
|
-
|
88
|
+
# Delegate methods to request
|
89
|
+
%i[url host domain scheme port].each do |method|
|
90
|
+
define_method(method) do
|
91
|
+
@request.send(method)
|
92
|
+
end
|
56
93
|
end
|
57
94
|
|
58
|
-
|
59
|
-
|
60
|
-
|
95
|
+
# Get the favicon URL if available
|
96
|
+
#
|
97
|
+
# @return [String, nil] The favicon URL or nil if not found
|
98
|
+
def favicon
|
99
|
+
return @favicon if defined?(@favicon)
|
61
100
|
|
62
|
-
|
63
|
-
|
101
|
+
return nil unless success?
|
102
|
+
|
103
|
+
@favicon = begin
|
104
|
+
# Try multiple approaches to find favicon
|
105
|
+
|
106
|
+
# 1. Look for standard favicon link tags
|
107
|
+
favicon_link = @inspector.page.css("link[rel='shortcut icon'], link[rel='icon'], link[rel='apple-touch-icon']").first
|
108
|
+
if favicon_link && favicon_link['href']
|
109
|
+
begin
|
110
|
+
return URI.join(url, favicon_link['href']).to_s
|
111
|
+
rescue URI::InvalidURIError
|
112
|
+
# Try next method
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# 2. Try the default location /favicon.ico
|
117
|
+
"#{scheme}://#{host}/favicon.ico"
|
118
|
+
rescue StandardError
|
119
|
+
nil
|
120
|
+
end
|
64
121
|
end
|
65
122
|
|
66
123
|
def domain_links(u = domain)
|
124
|
+
return [] unless success?
|
125
|
+
|
67
126
|
@inspector.domain_links(u, host)
|
68
127
|
end
|
69
128
|
|
70
129
|
def domain_images(u = domain)
|
130
|
+
return [] unless success?
|
131
|
+
|
71
132
|
@inspector.domain_images(u, host)
|
72
133
|
end
|
73
134
|
|
135
|
+
# Get full JSON representation of the page
|
136
|
+
#
|
137
|
+
# @return [Hash] JSON representation of the page
|
74
138
|
def to_hash
|
75
139
|
{
|
76
|
-
'url'
|
77
|
-
'scheme'
|
78
|
-
'host'
|
79
|
-
'port'
|
80
|
-
'title'
|
81
|
-
'description'
|
82
|
-
'meta'
|
83
|
-
'links'
|
84
|
-
'images'
|
85
|
-
'
|
86
|
-
|
140
|
+
'url' => url,
|
141
|
+
'scheme' => scheme,
|
142
|
+
'host' => host,
|
143
|
+
'port' => port,
|
144
|
+
'title' => title,
|
145
|
+
'description' => description,
|
146
|
+
'meta' => meta,
|
147
|
+
'links' => links,
|
148
|
+
'images' => images,
|
149
|
+
'favicon' => favicon,
|
150
|
+
'response' => {
|
151
|
+
'status' => status_code,
|
152
|
+
'headers' => response&.headers || {},
|
153
|
+
'success' => success?
|
154
|
+
},
|
155
|
+
'error' => error_message
|
87
156
|
}
|
88
157
|
end
|
89
158
|
|
90
159
|
def response
|
91
160
|
@response ||= fetch
|
92
|
-
rescue
|
161
|
+
rescue StandardError => e
|
162
|
+
@error = e
|
93
163
|
nil
|
94
164
|
end
|
95
165
|
|
96
166
|
private
|
97
|
-
|
167
|
+
|
98
168
|
def fetch
|
99
|
-
session = Faraday.new(:
|
100
|
-
|
169
|
+
session = Faraday.new(url: url) do |faraday|
|
170
|
+
# Configure retries based on available middleware
|
171
|
+
faraday.request :retry, { max: @retries } if defined?(Faraday::Retry)
|
101
172
|
|
173
|
+
# Configure redirect handling
|
102
174
|
if @allow_redirections
|
103
|
-
|
104
|
-
|
175
|
+
begin
|
176
|
+
faraday.use FaradayMiddleware::FollowRedirects, limit: 10
|
177
|
+
faraday.use :cookie_jar
|
178
|
+
rescue NameError, NoMethodError
|
179
|
+
# Continue without middleware if not available
|
180
|
+
end
|
105
181
|
end
|
106
182
|
|
107
|
-
faraday.headers.merge!(@headers
|
183
|
+
faraday.headers.merge!(@headers)
|
108
184
|
faraday.adapter :net_http
|
109
185
|
end
|
110
186
|
|
111
|
-
|
112
|
-
|
113
|
-
req.options.open_timeout = @read_timeout
|
114
|
-
end
|
187
|
+
# Manual retry mechanism as a backup
|
188
|
+
retries = 0
|
115
189
|
|
116
|
-
|
190
|
+
begin
|
191
|
+
response = session.get do |req|
|
192
|
+
req.options.timeout = @timeout
|
193
|
+
req.options.open_timeout = @timeout
|
194
|
+
end
|
117
195
|
|
118
|
-
|
196
|
+
@url = response.env.url.to_s
|
197
|
+
response
|
198
|
+
rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
|
199
|
+
retries += 1
|
200
|
+
retry if retries <= @retries
|
201
|
+
raise e
|
202
|
+
end
|
119
203
|
end
|
120
204
|
|
121
205
|
def with_default_scheme(request)
|
122
|
-
request.url && request.scheme.nil? ?
|
123
|
-
end
|
124
|
-
|
125
|
-
def default_user_agent
|
126
|
-
"WebInspector/#{WebInspector::VERSION} (+https://github.com/davidesantangelo/webinspector)"
|
206
|
+
request.url && request.scheme.nil? ? "http://#{request.url}" : request.url
|
127
207
|
end
|
128
208
|
|
129
209
|
def page
|
130
|
-
|
210
|
+
# Use URI.open instead of open for Ruby 3.0+ compatibility
|
211
|
+
Nokogiri::HTML(URI.open(with_default_scheme(@request),
|
212
|
+
allow_redirections: :safe,
|
213
|
+
read_timeout: @timeout,
|
214
|
+
'User-Agent' => @headers['User-Agent']))
|
131
215
|
end
|
132
216
|
end
|
133
217
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'addressable/uri'
|
2
4
|
|
3
5
|
module WebInspector
|
@@ -13,7 +15,7 @@ module WebInspector
|
|
13
15
|
def host
|
14
16
|
uri.host
|
15
17
|
end
|
16
|
-
|
18
|
+
|
17
19
|
def domain
|
18
20
|
suffix_domain
|
19
21
|
end
|
@@ -24,23 +26,23 @@ module WebInspector
|
|
24
26
|
|
25
27
|
def port
|
26
28
|
URI(normalized_uri).port
|
27
|
-
end
|
29
|
+
end
|
28
30
|
|
29
31
|
private
|
30
|
-
|
32
|
+
|
31
33
|
def suffix_domain
|
32
34
|
return @domain if @domain
|
33
|
-
|
35
|
+
|
34
36
|
begin
|
35
37
|
@domain = PublicSuffix.parse(host).domain
|
36
|
-
rescue URI::InvalidURIError, PublicSuffix::DomainInvalid
|
38
|
+
rescue URI::InvalidURIError, PublicSuffix::DomainInvalid
|
37
39
|
@domain = ''
|
38
40
|
end
|
39
41
|
end
|
40
|
-
|
42
|
+
|
41
43
|
def uri
|
42
44
|
Addressable::URI.parse(@url)
|
43
|
-
rescue Addressable::URI::InvalidURIError
|
45
|
+
rescue Addressable::URI::InvalidURIError
|
44
46
|
nil
|
45
47
|
end
|
46
48
|
|
@@ -48,4 +50,4 @@ module WebInspector
|
|
48
50
|
uri.normalize.to_s
|
49
51
|
end
|
50
52
|
end
|
51
|
-
end
|
53
|
+
end
|
data/lib/web_inspector.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require File.expand_path(File.join(File.dirname(__FILE__), 'web_inspector/page'))
|
2
4
|
require File.expand_path(File.join(File.dirname(__FILE__), 'web_inspector/version'))
|
3
5
|
|
4
6
|
module WebInspector
|
5
|
-
|
7
|
+
module_function
|
6
8
|
|
7
9
|
def new(url, options = {})
|
8
10
|
Page.new(url, options)
|
9
11
|
end
|
10
|
-
end
|
12
|
+
end
|
data/lib/webinspector.rb
CHANGED
data/webinspector.gemspec
CHANGED
@@ -1,38 +1,45 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require File.expand_path('
|
5
|
+
require File.expand_path('lib/web_inspector/version', __dir__)
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'webinspector'
|
8
9
|
spec.version = WebInspector::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
10
|
+
spec.authors = ['Davide Santangelo']
|
11
|
+
spec.email = ['davide.santangelo@gmail.com']
|
11
12
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
13
|
+
spec.summary = 'Ruby gem to inspect completely a web page.'
|
14
|
+
spec.description = 'Ruby gem to inspect completely a web page. It scrapes a given URL, and returns you its meta, links, images and more.'
|
15
|
+
spec.homepage = 'https://github.com/davidesantangelo/webinspector'
|
16
|
+
spec.license = 'MIT'
|
16
17
|
|
17
18
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
-
spec.bindir =
|
19
|
+
spec.bindir = 'exe'
|
19
20
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
-
spec.require_paths = [
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
spec.require_paths = ['lib']
|
22
|
+
spec.metadata = {
|
23
|
+
'source_code_uri' => 'https://github.com/davidesantangelo/webinspector',
|
24
|
+
'bug_tracker_uri' => 'https://github.com/davidesantangelo/webinspector/issues'
|
25
|
+
}
|
24
26
|
|
25
|
-
spec.
|
26
|
-
spec.add_development_dependency "vcr"
|
27
|
-
spec.add_development_dependency "typhoeus"
|
27
|
+
spec.required_ruby_version = '>= 3.0.0'
|
28
28
|
|
29
|
-
spec.
|
29
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
30
|
+
spec.add_development_dependency 'rspec', '~> 3.12'
|
31
|
+
spec.add_development_dependency 'rubocop', '~> 1.50'
|
32
|
+
spec.add_development_dependency 'vcr', '~> 6.1'
|
33
|
+
spec.add_development_dependency 'webmock', '~> 3.18'
|
30
34
|
|
31
|
-
spec.add_dependency
|
32
|
-
spec.add_dependency
|
33
|
-
spec.add_dependency
|
34
|
-
spec.add_dependency
|
35
|
-
spec.add_dependency
|
36
|
-
spec.add_dependency
|
37
|
-
spec.add_dependency
|
35
|
+
spec.add_dependency 'addressable', '~> 2.8'
|
36
|
+
spec.add_dependency 'faraday', '~> 2.7'
|
37
|
+
spec.add_dependency 'faraday-cookie_jar', '~> 0.0.7'
|
38
|
+
spec.add_dependency 'faraday-follow_redirects', '~> 0.3'
|
39
|
+
spec.add_dependency 'faraday-retry', '~> 2.1'
|
40
|
+
spec.add_dependency 'json', '~> 2.6'
|
41
|
+
spec.add_dependency 'nokogiri', '~> 1.14'
|
42
|
+
spec.add_dependency 'open_uri_redirections', '~> 0.2'
|
43
|
+
spec.add_dependency 'openurl', '~> 1.0'
|
44
|
+
spec.add_dependency 'public_suffix', '~> 5.0'
|
38
45
|
end
|
metadata
CHANGED
@@ -1,183 +1,225 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webinspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Davide Santangelo
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '13.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '13.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '3.12'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '3.12'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: rubocop
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '1.50'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '1.50'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: vcr
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '6.1'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '6.1'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: webmock
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '3.18'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '3.18'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: addressable
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '2.8'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '2.8'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: faraday
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
86
100
|
requirements:
|
87
|
-
- - "
|
101
|
+
- - "~>"
|
88
102
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
103
|
+
version: '2.7'
|
90
104
|
type: :runtime
|
91
105
|
prerelease: false
|
92
106
|
version_requirements: !ruby/object:Gem::Requirement
|
93
107
|
requirements:
|
94
|
-
- - "
|
108
|
+
- - "~>"
|
95
109
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
110
|
+
version: '2.7'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
112
|
+
name: faraday-cookie_jar
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
|
-
- - "
|
115
|
+
- - "~>"
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
117
|
+
version: 0.0.7
|
104
118
|
type: :runtime
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
|
-
- - "
|
122
|
+
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
124
|
+
version: 0.0.7
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
126
|
+
name: faraday-follow_redirects
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
114
128
|
requirements:
|
115
|
-
- - "
|
129
|
+
- - "~>"
|
116
130
|
- !ruby/object:Gem::Version
|
117
|
-
version: '0'
|
131
|
+
version: '0.3'
|
118
132
|
type: :runtime
|
119
133
|
prerelease: false
|
120
134
|
version_requirements: !ruby/object:Gem::Requirement
|
121
135
|
requirements:
|
122
|
-
- - "
|
136
|
+
- - "~>"
|
123
137
|
- !ruby/object:Gem::Version
|
124
|
-
version: '0'
|
138
|
+
version: '0.3'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: faraday-retry
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '2.1'
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '2.1'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: json
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '2.6'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '2.6'
|
125
167
|
- !ruby/object:Gem::Dependency
|
126
168
|
name: nokogiri
|
127
169
|
requirement: !ruby/object:Gem::Requirement
|
128
170
|
requirements:
|
129
|
-
- - "
|
171
|
+
- - "~>"
|
130
172
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
173
|
+
version: '1.14'
|
132
174
|
type: :runtime
|
133
175
|
prerelease: false
|
134
176
|
version_requirements: !ruby/object:Gem::Requirement
|
135
177
|
requirements:
|
136
|
-
- - "
|
178
|
+
- - "~>"
|
137
179
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
180
|
+
version: '1.14'
|
139
181
|
- !ruby/object:Gem::Dependency
|
140
182
|
name: open_uri_redirections
|
141
183
|
requirement: !ruby/object:Gem::Requirement
|
142
184
|
requirements:
|
143
|
-
- - "
|
185
|
+
- - "~>"
|
144
186
|
- !ruby/object:Gem::Version
|
145
|
-
version: '0'
|
187
|
+
version: '0.2'
|
146
188
|
type: :runtime
|
147
189
|
prerelease: false
|
148
190
|
version_requirements: !ruby/object:Gem::Requirement
|
149
191
|
requirements:
|
150
|
-
- - "
|
192
|
+
- - "~>"
|
151
193
|
- !ruby/object:Gem::Version
|
152
|
-
version: '0'
|
194
|
+
version: '0.2'
|
153
195
|
- !ruby/object:Gem::Dependency
|
154
196
|
name: openurl
|
155
197
|
requirement: !ruby/object:Gem::Requirement
|
156
198
|
requirements:
|
157
|
-
- - "
|
199
|
+
- - "~>"
|
158
200
|
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
201
|
+
version: '1.0'
|
160
202
|
type: :runtime
|
161
203
|
prerelease: false
|
162
204
|
version_requirements: !ruby/object:Gem::Requirement
|
163
205
|
requirements:
|
164
|
-
- - "
|
206
|
+
- - "~>"
|
165
207
|
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
208
|
+
version: '1.0'
|
167
209
|
- !ruby/object:Gem::Dependency
|
168
210
|
name: public_suffix
|
169
211
|
requirement: !ruby/object:Gem::Requirement
|
170
212
|
requirements:
|
171
|
-
- - "
|
213
|
+
- - "~>"
|
172
214
|
- !ruby/object:Gem::Version
|
173
|
-
version: '0'
|
215
|
+
version: '5.0'
|
174
216
|
type: :runtime
|
175
217
|
prerelease: false
|
176
218
|
version_requirements: !ruby/object:Gem::Requirement
|
177
219
|
requirements:
|
178
|
-
- - "
|
220
|
+
- - "~>"
|
179
221
|
- !ruby/object:Gem::Version
|
180
|
-
version: '0'
|
222
|
+
version: '5.0'
|
181
223
|
description: Ruby gem to inspect completely a web page. It scrapes a given URL, and
|
182
224
|
returns you its meta, links, images and more.
|
183
225
|
email:
|
@@ -203,11 +245,13 @@ files:
|
|
203
245
|
- lib/web_inspector/version.rb
|
204
246
|
- lib/webinspector.rb
|
205
247
|
- webinspector.gemspec
|
206
|
-
homepage:
|
248
|
+
homepage: https://github.com/davidesantangelo/webinspector
|
207
249
|
licenses:
|
208
250
|
- MIT
|
209
|
-
metadata:
|
210
|
-
|
251
|
+
metadata:
|
252
|
+
source_code_uri: https://github.com/davidesantangelo/webinspector
|
253
|
+
bug_tracker_uri: https://github.com/davidesantangelo/webinspector/issues
|
254
|
+
post_install_message:
|
211
255
|
rdoc_options: []
|
212
256
|
require_paths:
|
213
257
|
- lib
|
@@ -215,16 +259,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
215
259
|
requirements:
|
216
260
|
- - ">="
|
217
261
|
- !ruby/object:Gem::Version
|
218
|
-
version:
|
262
|
+
version: 3.0.0
|
219
263
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
264
|
requirements:
|
221
265
|
- - ">="
|
222
266
|
- !ruby/object:Gem::Version
|
223
267
|
version: '0'
|
224
268
|
requirements: []
|
225
|
-
|
226
|
-
|
227
|
-
signing_key:
|
269
|
+
rubygems_version: 3.3.26
|
270
|
+
signing_key:
|
228
271
|
specification_version: 4
|
229
272
|
summary: Ruby gem to inspect completely a web page.
|
230
273
|
test_files: []
|