metainspector 5.6.0 → 5.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +3 -4
- data/CHANGELOG.md +45 -16
- data/README.md +12 -3
- data/lib/meta_inspector/document.rb +10 -3
- data/lib/meta_inspector/errors.rb +2 -0
- data/lib/meta_inspector/parser.rb +3 -2
- data/lib/meta_inspector/parsers/head_links.rb +21 -8
- data/lib/meta_inspector/parsers/images.rb +6 -4
- data/lib/meta_inspector/parsers/links.rb +2 -1
- data/lib/meta_inspector/parsers/texts.rb +28 -0
- data/lib/meta_inspector/request.rb +1 -1
- data/lib/meta_inspector/url.rb +7 -5
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +18 -18
- data/spec/document_spec.rb +9 -2
- data/spec/fixtures/feeds.response +23 -0
- data/spec/fixtures/guardian.co.uk.response +1 -1
- data/spec/fixtures/headings.response +23 -0
- data/spec/fixtures/relative_links_with_empty_base.response +22 -0
- data/spec/meta_inspector/head_links_spec.rb +4 -1
- data/spec/meta_inspector/images_spec.rb +6 -0
- data/spec/meta_inspector/links_spec.rb +35 -11
- data/spec/meta_inspector/texts_spec.rb +42 -0
- data/spec/spec_helper.rb +3 -2
- metadata +46 -47
- data/spec/fixtures/iteh.at.response +0 -971
- data/spec/fixtures/tea-tron.com.response +0 -957
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 97b670ec8a7026383d659037318206d8262e17bbc35b0ec51f34609b6a6ebc95
|
4
|
+
data.tar.gz: 326230360c0199174e39bf495e00de74057a208e155ca0f4ec584f9e728f59bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1e89cc17ea97453f74935883267851f1a2f0e1a9255ea5a1a259a850950d9633227a41f4ed06af0b43a7e1f1a2331368b274c7a715ea6779e3ba60e616e11aa7
|
7
|
+
data.tar.gz: 656a52071ada09f4ac45703f1a688ec85f17f30b5e89f9f1965f81478c21ad3feb651df7773caf5a447c07bea6ee78cb84505f53f5ab5ac689a847ccbdb5ee15
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,48 +1,77 @@
|
|
1
1
|
# MetaInpector Changelog
|
2
2
|
|
3
|
+
## [Changes in 5.10](https://github.com/jaimeiniesta/metainspector/compare/v5.9.0...v5.10.0)
|
4
|
+
|
5
|
+
* Upgrade to Faraday 1.0.
|
6
|
+
|
7
|
+
## [Changes in 5.9](https://github.com/jaimeiniesta/metainspector/compare/v5.8.0...v5.9.0)
|
8
|
+
|
9
|
+
* Added #feeds method to retrieve all feeds of a page.
|
10
|
+
* Adds deprecation warning on #feed method.
|
11
|
+
|
12
|
+
## [Changes in 5.8](https://github.com/jaimeiniesta/metainspector/compare/v5.7.0...v5.8.0)
|
13
|
+
|
14
|
+
* Added h1..h6 support.
|
15
|
+
|
16
|
+
## [Changes in 5.7](https://github.com/jaimeiniesta/metainspector/compare/v5.6.0...v5.7.0)
|
17
|
+
|
18
|
+
* Avoids normalizing image URLs. https://github.com/jaimeiniesta/metainspector/pull/241
|
19
|
+
* Adds `NonHtmlErrorException` instead of `ParserError` https://github.com/jaimeiniesta/metainspector/pull/248
|
20
|
+
|
21
|
+
## [Changes in 5.6](https://github.com/jaimeiniesta/metainspector/compare/v5.5.0...v5.6.0)
|
22
|
+
|
23
|
+
* New feature: `:encoding` option for force encoding of a parsed document.
|
24
|
+
* Improvement: make `best_title` and `best_author` work by order of preference, rather than length.
|
25
|
+
|
26
|
+
## [Changes in 5.5](https://github.com/jaimeiniesta/metainspector/compare/v5.4.0...v5.5.0)
|
27
|
+
|
28
|
+
* New feature: adds `author`, `best_author`.
|
29
|
+
* Bugfix: adds presence validation for empty string on meta tag image values.
|
30
|
+
* Improves spider and links checker examples.
|
31
|
+
* Uses WebMock instead of FakeWeb in tests.
|
32
|
+
|
3
33
|
## [Changes in 5.4](https://github.com/jaimeiniesta/metainspector/compare/v5.3.0...v5.4.0)
|
4
34
|
|
5
|
-
Supports Gzipped responses.
|
6
|
-
Adds method `best_description` and makes `description` return just the meta description.
|
7
|
-
Removes support for Ruby 2.0.0 and adds support for 2.4.0.
|
35
|
+
* Supports Gzipped responses.
|
36
|
+
* Adds method `best_description` and makes `description` return just the meta description.
|
37
|
+
* Removes support for Ruby 2.0.0 and adds support for 2.4.0.
|
8
38
|
|
9
39
|
## [Changes in 5.3](https://github.com/jaimeiniesta/metainspector/compare/v5.2.0...v5.3.0)
|
10
40
|
|
11
|
-
Returns secondary description if meta description is empty.
|
12
|
-
Adds a custom timeout on top of the ones for Faraday, and sets defaults for timeouts.
|
13
|
-
Eliminates possible NULL char in HTML which breaks nokogiri.
|
41
|
+
* Returns secondary description if meta description is empty.
|
42
|
+
* Adds a custom timeout on top of the ones for Faraday, and sets defaults for timeouts.
|
43
|
+
* Eliminates possible NULL char in HTML which breaks nokogiri.
|
14
44
|
|
15
45
|
## [Changes in 5.2](https://github.com/jaimeiniesta/metainspector/compare/v5.1.0...v5.2.0)
|
16
46
|
|
17
|
-
Removes the deprecated `html_content_only` option, and replaces it by `allow_non_html_content`, by default `false`.
|
47
|
+
* Removes the deprecated `html_content_only` option, and replaces it by `allow_non_html_content`, by default `false`.
|
18
48
|
|
19
49
|
## [Changes in 5.1](https://github.com/jaimeiniesta/metainspector/compare/v5.0.0...v5.1.0)
|
20
50
|
|
21
|
-
Deprecates the `html_content_only` option, and turns it on by default.
|
51
|
+
* Deprecates the `html_content_only` option, and turns it on by default.
|
22
52
|
|
23
53
|
## [Changes in 5.0](https://github.com/jaimeiniesta/metainspector/compare/v4.7.1...v5.0.0)
|
24
54
|
|
25
|
-
Removes the ExceptionLog, all exceptions are now encapsulated in our own exception classes and
|
55
|
+
* Removes the ExceptionLog, all exceptions are now encapsulated in our own exception classes and
|
26
56
|
always raised.
|
27
57
|
|
28
58
|
## [Changes in 4.7](https://github.com/jaimeiniesta/metainspector/compare/v4.6.0...v4.7.1)
|
29
59
|
|
30
|
-
MetaInspector can be configured to use [Faraday::HttpCache](https://github.com/plataformatec/faraday-http-cache) to cache page responses. For that you should pass the `faraday_http_cache` option with at least the `:store` key, for example:
|
60
|
+
* MetaInspector can be configured to use [Faraday::HttpCache](https://github.com/plataformatec/faraday-http-cache) to cache page responses. For that you should pass the `faraday_http_cache` option with at least the `:store` key, for example:
|
31
61
|
|
32
62
|
```ruby
|
33
63
|
cache = ActiveSupport::Cache.lookup_store(:file_store, '/tmp/cache')
|
34
64
|
page = MetaInspector.new('http://example.com', faraday_http_cache: { store: cache })
|
35
65
|
```
|
36
66
|
|
37
|
-
Bugfixes:
|
38
|
-
|
39
|
-
* Parsing of the document is done as soon as it is initialized (just like we do with the request), so
|
67
|
+
* Bugfixes:
|
68
|
+
* Parsing of the document is done as soon as it is initialized (just like we do with the request), so
|
40
69
|
that parsing errors will be catched earlier.
|
41
|
-
* Rescues from Faraday::SSLError.
|
70
|
+
* Rescues from Faraday::SSLError.
|
42
71
|
|
43
72
|
## [Changes in 4.6](https://github.com/jaimeiniesta/metainspector/compare/v4.5.0...v4.6.0)
|
44
73
|
|
45
|
-
Faraday can be passed options via `:faraday_options`. This is useful in cases where we need to
|
74
|
+
* Faraday can be passed options via `:faraday_options`. This is useful in cases where we need to
|
46
75
|
customize the way we request the page, like for example disabling SSL verification, like this:
|
47
76
|
|
48
77
|
```ruby
|
@@ -70,7 +99,7 @@ MetaInpector.new('https://example.com', faraday_options: { ssl: { verify: false
|
|
70
99
|
|
71
100
|
## [Changes in 4.4](https://github.com/jaimeiniesta/metainspector/compare/v4.3.0...v4.4.0)
|
72
101
|
|
73
|
-
The default headers now include `'Accept-Encoding' => 'identity'` to minimize trouble with servers that respond with malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
102
|
+
* The default headers now include `'Accept-Encoding' => 'identity'` to minimize trouble with servers that respond with malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
74
103
|
|
75
104
|
## [Changes in 4.3](https://github.com/jaimeiniesta/metainspector/compare/v4.3.0...v4.4.0)
|
76
105
|
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# MetaInspector [](http://travis-ci.org/jaimeiniesta/metainspector) [](http://travis-ci.org/jaimeiniesta/metainspector) [](https://codeclimate.com/github/jaimeiniesta/metainspector)
|
2
2
|
|
3
3
|
MetaInspector is a gem for web scraping purposes.
|
4
4
|
|
@@ -22,6 +22,8 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
22
22
|
gem 'metainspector'
|
23
23
|
```
|
24
24
|
|
25
|
+
Supported Ruby versions are defined in [`.travis.yml`](.travis.yml).
|
26
|
+
|
25
27
|
## Usage
|
26
28
|
|
27
29
|
Initialize a MetaInspector instance for an URL, like this:
|
@@ -73,7 +75,7 @@ page.root_url # Root url (scheme + host, like http://sitevalidator.co
|
|
73
75
|
page.head_links # an array of hashes of all head/links
|
74
76
|
page.stylesheets # an array of hashes of all head/links where rel='stylesheet'
|
75
77
|
page.canonicals # an array of hashes of all head/links where rel='canonical'
|
76
|
-
page.
|
78
|
+
page.feeds # Get rss or atom links in meta data fields as array of hash in the form { href: "...", title: "...", type: "..." }
|
77
79
|
```
|
78
80
|
|
79
81
|
### Texts
|
@@ -85,6 +87,12 @@ page.author # author of the page from the meta author tag
|
|
85
87
|
page.best_author # best author of the page, from a selection of candidates
|
86
88
|
page.description # returns the meta description
|
87
89
|
page.best_description # returns the first non-empty description between the following candidates: standard meta description, og:description, twitter:description, the first long paragraph
|
90
|
+
page.h1 # returns h1 text array
|
91
|
+
page.h2 # returns h2 text array
|
92
|
+
page.h3 # returns h3 text array
|
93
|
+
page.h4 # returns h4 text array
|
94
|
+
page.h5 # returns h5 text array
|
95
|
+
page.h6 # returns h6 text array
|
88
96
|
```
|
89
97
|
|
90
98
|
### Links
|
@@ -396,7 +404,8 @@ Web page scraping is tricky, you can expect to find different exceptions during
|
|
396
404
|
|
397
405
|
* `MetaInspector::TimeoutError`. When fetching a web page has taken too long.
|
398
406
|
* `MetaInspector::RequestError`. When there has been an error on the request phase. Examples: page not found, SSL failure, invalid URI.
|
399
|
-
* `MetaInspector::ParserError`. When there has been an error parsing the contents of the page.
|
407
|
+
* `MetaInspector::ParserError`. When there has been an error parsing the contents of the page.
|
408
|
+
* `MetaInspector::NonHtmlError`. When the contents of the page was not HTML. See also the `allow_non_html_content` option
|
400
409
|
|
401
410
|
## Examples
|
402
411
|
|
@@ -48,8 +48,8 @@ module MetaInspector
|
|
48
48
|
delegate [:content_type, :response] => :@request
|
49
49
|
|
50
50
|
delegate [:parsed, :title, :best_title, :author, :best_author,
|
51
|
-
:description, :best_description, :links,
|
52
|
-
:images, :feed, :charset, :meta_tags,
|
51
|
+
:h1, :h2, :h3, :h4, :h5, :h6, :description, :best_description, :links,
|
52
|
+
:images, :feeds, :feed, :charset, :meta_tags,
|
53
53
|
:meta_tag, :meta, :favicon,
|
54
54
|
:head_links, :stylesheets, :canonicals] => :@parser
|
55
55
|
|
@@ -66,10 +66,17 @@ module MetaInspector
|
|
66
66
|
'best_author' => best_author,
|
67
67
|
'description' => description,
|
68
68
|
'best_description' => best_description,
|
69
|
+
'h1' => h1,
|
70
|
+
'h2' => h2,
|
71
|
+
'h3' => h3,
|
72
|
+
'h4' => h4,
|
73
|
+
'h5' => h5,
|
74
|
+
'h6' => h6,
|
69
75
|
'links' => links.to_hash,
|
70
76
|
'images' => images.to_a,
|
71
77
|
'charset' => charset,
|
72
78
|
'feed' => feed,
|
79
|
+
'feeds' => feeds,
|
73
80
|
'content_type' => content_type,
|
74
81
|
'meta_tags' => meta_tags,
|
75
82
|
'favicon' => images.favicon,
|
@@ -105,7 +112,7 @@ module MetaInspector
|
|
105
112
|
|
106
113
|
def document
|
107
114
|
@document ||= if !allow_non_html_content && !content_type.nil? && content_type != 'text/html'
|
108
|
-
fail MetaInspector::
|
115
|
+
fail MetaInspector::NonHtmlError.new "The url provided contains #{content_type} content instead of text/html content"
|
109
116
|
else
|
110
117
|
@request.read
|
111
118
|
end
|
@@ -23,10 +23,11 @@ module MetaInspector
|
|
23
23
|
extend Forwardable
|
24
24
|
delegate [:url, :scheme, :host] => :@document
|
25
25
|
delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
|
26
|
-
delegate [:head_links, :stylesheets, :canonicals, :feed]
|
26
|
+
delegate [:head_links, :stylesheets, :canonicals, :feeds, :feed] => :@head_links_parser
|
27
27
|
delegate [:links, :base_url] => :@links_parser
|
28
28
|
delegate :images => :@images_parser
|
29
|
-
delegate [:title, :best_title, :author, :best_author, :description, :best_description
|
29
|
+
delegate [:title, :best_title, :author, :best_author, :description, :best_description,
|
30
|
+
:h1, :h2, :h3, :h4, :h5, :h6] => :@texts_parser
|
30
31
|
|
31
32
|
# Returns the whole parsed document
|
32
33
|
def parsed
|
@@ -3,6 +3,10 @@ module MetaInspector
|
|
3
3
|
class HeadLinksParser < Base
|
4
4
|
delegate [:parsed, :base_url] => :@main_parser
|
5
5
|
|
6
|
+
KNOWN_FEED_TYPES = %w[
|
7
|
+
application/rss+xml application/atom+xml application/json
|
8
|
+
].freeze
|
9
|
+
|
6
10
|
def head_links
|
7
11
|
@head_links ||= parsed.css('head link').map do |tag|
|
8
12
|
Hash[
|
@@ -24,16 +28,25 @@ module MetaInspector
|
|
24
28
|
@canonicals ||= head_links.select { |hl| hl[:rel] == 'canonical' }
|
25
29
|
end
|
26
30
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
def feeds
|
32
|
+
@feeds ||=
|
33
|
+
parsed.search("//link[@rel='alternate']").map do |link|
|
34
|
+
next if !KNOWN_FEED_TYPES.include?(link["type"]) || link["href"].to_s.strip == ''
|
31
35
|
|
32
|
-
|
36
|
+
{
|
37
|
+
title: link["title"],
|
38
|
+
href: URL.absolutify(link["href"], base_url),
|
39
|
+
type: link["type"]
|
40
|
+
}
|
41
|
+
end.compact
|
42
|
+
end
|
33
43
|
|
34
|
-
def
|
35
|
-
|
36
|
-
feed
|
44
|
+
def feed
|
45
|
+
warn "DEPRECATION: Use MetaInspector#feeds instead of #feed. The former gives you all feeds and their metadata, the latter will be removed."
|
46
|
+
@feed ||= begin
|
47
|
+
first_feed = feeds.find { |l| /\/(rss|atom)\+xml$/i =~ l[:type] } || {}
|
48
|
+
first_feed[:href]
|
49
|
+
end
|
37
50
|
end
|
38
51
|
end
|
39
52
|
end
|
@@ -29,14 +29,16 @@ module MetaInspector
|
|
29
29
|
# If none found, tries with Twitter image
|
30
30
|
def owner_suggested
|
31
31
|
suggested_img = content_of(meta['og:image']) || content_of(meta['twitter:image'])
|
32
|
-
URL.absolutify(suggested_img, base_url) if suggested_img
|
32
|
+
URL.absolutify(suggested_img, base_url, normalize: false) if suggested_img
|
33
33
|
end
|
34
34
|
|
35
35
|
# Returns an array of [img_url, width, height] sorted by image area (width * height)
|
36
36
|
def with_size
|
37
37
|
@with_size ||= begin
|
38
38
|
img_nodes = parsed.search('//img').select{ |img_node| img_node['src'] }
|
39
|
-
imgs_with_size = img_nodes.map
|
39
|
+
imgs_with_size = img_nodes.map do |img_node|
|
40
|
+
[URL.absolutify(img_node['src'], base_url, normalize: false), img_node['width'], img_node['height']]
|
41
|
+
end
|
40
42
|
imgs_with_size.uniq! { |url, width, height| url }
|
41
43
|
if @download_images
|
42
44
|
imgs_with_size.map! do |url, width, height|
|
@@ -71,7 +73,7 @@ module MetaInspector
|
|
71
73
|
def favicon
|
72
74
|
query = '//link[@rel="icon" or contains(@rel, "shortcut")]'
|
73
75
|
value = parsed.xpath(query)[0].attributes['href'].value
|
74
|
-
@favicon ||= URL.absolutify(value, base_url)
|
76
|
+
@favicon ||= URL.absolutify(value, base_url, normalize: false)
|
75
77
|
rescue
|
76
78
|
nil
|
77
79
|
end
|
@@ -83,7 +85,7 @@ module MetaInspector
|
|
83
85
|
end
|
84
86
|
|
85
87
|
def absolutified_images
|
86
|
-
parsed_images.map { |i| URL.absolutify(i, base_url) }
|
88
|
+
parsed_images.map { |i| URL.absolutify(i, base_url, normalize: false) }
|
87
89
|
end
|
88
90
|
|
89
91
|
def parsed_images
|
@@ -47,7 +47,8 @@ module MetaInspector
|
|
47
47
|
# This can be the one set on a <base> tag,
|
48
48
|
# or the url of the document if no <base> tag was found.
|
49
49
|
def base_url
|
50
|
-
base_href
|
50
|
+
current_base_href = base_href.to_s.strip.empty? ? nil : base_href
|
51
|
+
current_base_href || url
|
51
52
|
end
|
52
53
|
|
53
54
|
# Returns the value of the href attribute on the <base /> tag, if exists
|
@@ -13,6 +13,30 @@ module MetaInspector
|
|
13
13
|
@best_title ||= find_best_title
|
14
14
|
end
|
15
15
|
|
16
|
+
def h1
|
17
|
+
@h1 ||= find_heading('h1')
|
18
|
+
end
|
19
|
+
|
20
|
+
def h2
|
21
|
+
@h2 ||= find_heading('h2')
|
22
|
+
end
|
23
|
+
|
24
|
+
def h3
|
25
|
+
@h3 ||= find_heading('h3')
|
26
|
+
end
|
27
|
+
|
28
|
+
def h4
|
29
|
+
@h4 ||= find_heading('h4')
|
30
|
+
end
|
31
|
+
|
32
|
+
def h5
|
33
|
+
@h5 ||= find_heading('h5')
|
34
|
+
end
|
35
|
+
|
36
|
+
def h6
|
37
|
+
@h6 ||= find_heading('h6')
|
38
|
+
end
|
39
|
+
|
16
40
|
# Returns the meta author, if present
|
17
41
|
def author
|
18
42
|
@author ||= meta['author']
|
@@ -45,6 +69,10 @@ module MetaInspector
|
|
45
69
|
|
46
70
|
private
|
47
71
|
|
72
|
+
def find_heading(heading)
|
73
|
+
parsed.css(heading).map { |tag| tag.inner_text.strip.gsub(/\s+/, ' ') }.reject(&:empty?)
|
74
|
+
end
|
75
|
+
|
48
76
|
# Look for candidates per list of priority
|
49
77
|
def find_best_title
|
50
78
|
candidates = [
|
@@ -48,7 +48,7 @@ module MetaInspector
|
|
48
48
|
@response ||= fetch
|
49
49
|
rescue Faraday::TimeoutError => e
|
50
50
|
raise MetaInspector::TimeoutError.new(e)
|
51
|
-
rescue Faraday::
|
51
|
+
rescue Faraday::ConnectionFailed, Faraday::SSLError, URI::InvalidURIError, FaradayMiddleware::RedirectLimitReached => e
|
52
52
|
raise MetaInspector::RequestError.new(e)
|
53
53
|
end
|
54
54
|
|
data/lib/meta_inspector/url.rb
CHANGED
@@ -5,7 +5,7 @@ module MetaInspector
|
|
5
5
|
attr_reader :url
|
6
6
|
|
7
7
|
def initialize(initial_url, options = {})
|
8
|
-
options = defaults.merge(options)
|
8
|
+
options = self.class.defaults.merge(options)
|
9
9
|
|
10
10
|
@normalize = options[:normalize]
|
11
11
|
|
@@ -56,11 +56,13 @@ module MetaInspector
|
|
56
56
|
# http:, ftp:, telnet:, mailto:, javascript: ...
|
57
57
|
# Protocol-relative URLs are also resolved to use the same
|
58
58
|
# schema as the base_url
|
59
|
-
def self.absolutify(url, base_url)
|
59
|
+
def self.absolutify(url, base_url, options = {})
|
60
|
+
options = defaults.merge(options)
|
60
61
|
if url =~ /^\w*\:/i
|
61
|
-
MetaInspector::URL.new(url).url
|
62
|
+
MetaInspector::URL.new(url, options).url
|
62
63
|
else
|
63
|
-
Addressable::URI.join(base_url, url)
|
64
|
+
uri = Addressable::URI.join(base_url, url)
|
65
|
+
options[:normalize] ? uri.normalize.to_s : uri.to_s
|
64
66
|
end
|
65
67
|
rescue MetaInspector::ParserError, Addressable::URI::InvalidURIError, ArgumentError
|
66
68
|
nil
|
@@ -68,7 +70,7 @@ module MetaInspector
|
|
68
70
|
|
69
71
|
private
|
70
72
|
|
71
|
-
def defaults
|
73
|
+
def self.defaults
|
72
74
|
{ :normalize => true }
|
73
75
|
end
|
74
76
|
|
data/meta_inspector.gemspec
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
require File.expand_path('../lib/meta_inspector/version', __FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
|
-
gem.
|
5
|
-
gem.email =
|
4
|
+
gem.author = "Jaime Iniesta"
|
5
|
+
gem.email = "jaimeiniesta@gmail.com"
|
6
6
|
gem.description = %q{MetaInspector lets you scrape a web page and get its links, images, texts, meta tags...}
|
7
7
|
gem.summary = %q{MetaInspector is a ruby gem for web scraping purposes, that returns metadata from a given URL}
|
8
|
-
gem.homepage = "https://github.com/
|
8
|
+
gem.homepage = "https://github.com/metainspector/metainspector"
|
9
9
|
gem.license = "MIT"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,20 +14,20 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.require_paths = ["lib"]
|
15
15
|
gem.version = MetaInspector::VERSION
|
16
16
|
|
17
|
-
gem.add_dependency 'nokogiri', '~> 1.
|
18
|
-
gem.add_dependency 'faraday', '~> 0.
|
19
|
-
gem.add_dependency 'faraday_middleware', '~> 0.
|
20
|
-
gem.add_dependency 'faraday-cookie_jar', '~> 0.0'
|
21
|
-
gem.add_dependency 'faraday-http-cache', '~> 2.0'
|
22
|
-
gem.add_dependency 'faraday-encoding', '~> 0.0'
|
23
|
-
gem.add_dependency 'addressable', '~> 2.
|
24
|
-
gem.add_dependency 'fastimage', '~> 2.1'
|
25
|
-
gem.add_dependency 'nesty', '~> 1.0'
|
17
|
+
gem.add_dependency 'nokogiri', '~> 1.10.9'
|
18
|
+
gem.add_dependency 'faraday', '~> 1.0.0'
|
19
|
+
gem.add_dependency 'faraday_middleware', '~> 1.0.0'
|
20
|
+
gem.add_dependency 'faraday-cookie_jar', '~> 0.0.6'
|
21
|
+
gem.add_dependency 'faraday-http-cache', '~> 2.2.0'
|
22
|
+
gem.add_dependency 'faraday-encoding', '~> 0.0.5'
|
23
|
+
gem.add_dependency 'addressable', '~> 2.7.0'
|
24
|
+
gem.add_dependency 'fastimage', '~> 2.1.7'
|
25
|
+
gem.add_dependency 'nesty', '~> 1.0.2'
|
26
26
|
|
27
|
-
gem.add_development_dependency 'rspec', '~> 3.0'
|
28
|
-
gem.add_development_dependency 'webmock'
|
29
|
-
gem.add_development_dependency 'awesome_print'
|
30
|
-
gem.add_development_dependency 'rake', '~>
|
31
|
-
gem.add_development_dependency 'pry'
|
32
|
-
gem.add_development_dependency 'rubocop'
|
27
|
+
gem.add_development_dependency 'rspec', '~> 3.9.0'
|
28
|
+
gem.add_development_dependency 'webmock', '~> 3.8.3'
|
29
|
+
gem.add_development_dependency 'awesome_print', '~> 1.8.0'
|
30
|
+
gem.add_development_dependency 'rake', '~> 13.0.1'
|
31
|
+
gem.add_development_dependency 'pry', '~> 0.13.1'
|
32
|
+
gem.add_development_dependency 'rubocop', '~> 0.82.0'
|
33
33
|
end
|