metainspector 5.6.0 → 5.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +3 -4
- data/CHANGELOG.md +45 -16
- data/README.md +12 -3
- data/lib/meta_inspector/document.rb +10 -3
- data/lib/meta_inspector/errors.rb +2 -0
- data/lib/meta_inspector/parser.rb +3 -2
- data/lib/meta_inspector/parsers/head_links.rb +21 -8
- data/lib/meta_inspector/parsers/images.rb +6 -4
- data/lib/meta_inspector/parsers/links.rb +2 -1
- data/lib/meta_inspector/parsers/texts.rb +28 -0
- data/lib/meta_inspector/request.rb +1 -1
- data/lib/meta_inspector/url.rb +7 -5
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +18 -18
- data/spec/document_spec.rb +9 -2
- data/spec/fixtures/feeds.response +23 -0
- data/spec/fixtures/guardian.co.uk.response +1 -1
- data/spec/fixtures/headings.response +23 -0
- data/spec/fixtures/relative_links_with_empty_base.response +22 -0
- data/spec/meta_inspector/head_links_spec.rb +4 -1
- data/spec/meta_inspector/images_spec.rb +6 -0
- data/spec/meta_inspector/links_spec.rb +35 -11
- data/spec/meta_inspector/texts_spec.rb +42 -0
- data/spec/spec_helper.rb +3 -2
- metadata +46 -47
- data/spec/fixtures/iteh.at.response +0 -971
- data/spec/fixtures/tea-tron.com.response +0 -957
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 97b670ec8a7026383d659037318206d8262e17bbc35b0ec51f34609b6a6ebc95
|
4
|
+
data.tar.gz: 326230360c0199174e39bf495e00de74057a208e155ca0f4ec584f9e728f59bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1e89cc17ea97453f74935883267851f1a2f0e1a9255ea5a1a259a850950d9633227a41f4ed06af0b43a7e1f1a2331368b274c7a715ea6779e3ba60e616e11aa7
|
7
|
+
data.tar.gz: 656a52071ada09f4ac45703f1a688ec85f17f30b5e89f9f1965f81478c21ad3feb651df7773caf5a447c07bea6ee78cb84505f53f5ab5ac689a847ccbdb5ee15
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,48 +1,77 @@
|
|
1
1
|
# MetaInpector Changelog
|
2
2
|
|
3
|
+
## [Changes in 5.10](https://github.com/jaimeiniesta/metainspector/compare/v5.9.0...v5.10.0)
|
4
|
+
|
5
|
+
* Upgrade to Faraday 1.0.
|
6
|
+
|
7
|
+
## [Changes in 5.9](https://github.com/jaimeiniesta/metainspector/compare/v5.8.0...v5.9.0)
|
8
|
+
|
9
|
+
* Added #feeds method to retrieve all feeds of a page.
|
10
|
+
* Adds deprecation warning on #feed method.
|
11
|
+
|
12
|
+
## [Changes in 5.8](https://github.com/jaimeiniesta/metainspector/compare/v5.7.0...v5.8.0)
|
13
|
+
|
14
|
+
* Added h1..h6 support.
|
15
|
+
|
16
|
+
## [Changes in 5.7](https://github.com/jaimeiniesta/metainspector/compare/v5.6.0...v5.7.0)
|
17
|
+
|
18
|
+
* Avoids normalizing image URLs. https://github.com/jaimeiniesta/metainspector/pull/241
|
19
|
+
* Adds `NonHtmlErrorException` instead of `ParserError` https://github.com/jaimeiniesta/metainspector/pull/248
|
20
|
+
|
21
|
+
## [Changes in 5.6](https://github.com/jaimeiniesta/metainspector/compare/v5.5.0...v5.6.0)
|
22
|
+
|
23
|
+
* New feature: `:encoding` option for force encoding of a parsed document.
|
24
|
+
* Improvement: make `best_title` and `best_author` work by order of preference, rather than length.
|
25
|
+
|
26
|
+
## [Changes in 5.5](https://github.com/jaimeiniesta/metainspector/compare/v5.4.0...v5.5.0)
|
27
|
+
|
28
|
+
* New feature: adds `author`, `best_author`.
|
29
|
+
* Bugfix: adds presence validation for empty string on meta tag image values.
|
30
|
+
* Improves spider and links checker examples.
|
31
|
+
* Uses WebMock instead of FakeWeb in tests.
|
32
|
+
|
3
33
|
## [Changes in 5.4](https://github.com/jaimeiniesta/metainspector/compare/v5.3.0...v5.4.0)
|
4
34
|
|
5
|
-
Supports Gzipped responses.
|
6
|
-
Adds method `best_description` and makes `description` return just the meta description.
|
7
|
-
Removes support for Ruby 2.0.0 and adds support for 2.4.0.
|
35
|
+
* Supports Gzipped responses.
|
36
|
+
* Adds method `best_description` and makes `description` return just the meta description.
|
37
|
+
* Removes support for Ruby 2.0.0 and adds support for 2.4.0.
|
8
38
|
|
9
39
|
## [Changes in 5.3](https://github.com/jaimeiniesta/metainspector/compare/v5.2.0...v5.3.0)
|
10
40
|
|
11
|
-
Returns secondary description if meta description is empty.
|
12
|
-
Adds a custom timeout on top of the ones for Faraday, and sets defaults for timeouts.
|
13
|
-
Eliminates possible NULL char in HTML which breaks nokogiri.
|
41
|
+
* Returns secondary description if meta description is empty.
|
42
|
+
* Adds a custom timeout on top of the ones for Faraday, and sets defaults for timeouts.
|
43
|
+
* Eliminates possible NULL char in HTML which breaks nokogiri.
|
14
44
|
|
15
45
|
## [Changes in 5.2](https://github.com/jaimeiniesta/metainspector/compare/v5.1.0...v5.2.0)
|
16
46
|
|
17
|
-
Removes the deprecated `html_content_only` option, and replaces it by `allow_non_html_content`, by default `false`.
|
47
|
+
* Removes the deprecated `html_content_only` option, and replaces it by `allow_non_html_content`, by default `false`.
|
18
48
|
|
19
49
|
## [Changes in 5.1](https://github.com/jaimeiniesta/metainspector/compare/v5.0.0...v5.1.0)
|
20
50
|
|
21
|
-
Deprecates the `html_content_only` option, and turns it on by default.
|
51
|
+
* Deprecates the `html_content_only` option, and turns it on by default.
|
22
52
|
|
23
53
|
## [Changes in 5.0](https://github.com/jaimeiniesta/metainspector/compare/v4.7.1...v5.0.0)
|
24
54
|
|
25
|
-
Removes the ExceptionLog, all exceptions are now encapsulated in our own exception classes and
|
55
|
+
* Removes the ExceptionLog, all exceptions are now encapsulated in our own exception classes and
|
26
56
|
always raised.
|
27
57
|
|
28
58
|
## [Changes in 4.7](https://github.com/jaimeiniesta/metainspector/compare/v4.6.0...v4.7.1)
|
29
59
|
|
30
|
-
MetaInspector can be configured to use [Faraday::HttpCache](https://github.com/plataformatec/faraday-http-cache) to cache page responses. For that you should pass the `faraday_http_cache` option with at least the `:store` key, for example:
|
60
|
+
* MetaInspector can be configured to use [Faraday::HttpCache](https://github.com/plataformatec/faraday-http-cache) to cache page responses. For that you should pass the `faraday_http_cache` option with at least the `:store` key, for example:
|
31
61
|
|
32
62
|
```ruby
|
33
63
|
cache = ActiveSupport::Cache.lookup_store(:file_store, '/tmp/cache')
|
34
64
|
page = MetaInspector.new('http://example.com', faraday_http_cache: { store: cache })
|
35
65
|
```
|
36
66
|
|
37
|
-
Bugfixes:
|
38
|
-
|
39
|
-
* Parsing of the document is done as soon as it is initialized (just like we do with the request), so
|
67
|
+
* Bugfixes:
|
68
|
+
* Parsing of the document is done as soon as it is initialized (just like we do with the request), so
|
40
69
|
that parsing errors will be catched earlier.
|
41
|
-
* Rescues from Faraday::SSLError.
|
70
|
+
* Rescues from Faraday::SSLError.
|
42
71
|
|
43
72
|
## [Changes in 4.6](https://github.com/jaimeiniesta/metainspector/compare/v4.5.0...v4.6.0)
|
44
73
|
|
45
|
-
Faraday can be passed options via `:faraday_options`. This is useful in cases where we need to
|
74
|
+
* Faraday can be passed options via `:faraday_options`. This is useful in cases where we need to
|
46
75
|
customize the way we request the page, like for example disabling SSL verification, like this:
|
47
76
|
|
48
77
|
```ruby
|
@@ -70,7 +99,7 @@ MetaInpector.new('https://example.com', faraday_options: { ssl: { verify: false
|
|
70
99
|
|
71
100
|
## [Changes in 4.4](https://github.com/jaimeiniesta/metainspector/compare/v4.3.0...v4.4.0)
|
72
101
|
|
73
|
-
The default headers now include `'Accept-Encoding' => 'identity'` to minimize trouble with servers that respond with malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
102
|
+
* The default headers now include `'Accept-Encoding' => 'identity'` to minimize trouble with servers that respond with malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
74
103
|
|
75
104
|
## [Changes in 4.3](https://github.com/jaimeiniesta/metainspector/compare/v4.3.0...v4.4.0)
|
76
105
|
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# MetaInspector [![Build Status](https://secure.travis-ci.org/jaimeiniesta/metainspector.png)](http://travis-ci.org/jaimeiniesta/metainspector) [![
|
1
|
+
# MetaInspector [![Build Status](https://secure.travis-ci.org/jaimeiniesta/metainspector.png)](http://travis-ci.org/jaimeiniesta/metainspector) [![Code Climate](https://codeclimate.com/github/jaimeiniesta/metainspector/badges/gpa.svg)](https://codeclimate.com/github/jaimeiniesta/metainspector)
|
2
2
|
|
3
3
|
MetaInspector is a gem for web scraping purposes.
|
4
4
|
|
@@ -22,6 +22,8 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
22
22
|
gem 'metainspector'
|
23
23
|
```
|
24
24
|
|
25
|
+
Supported Ruby versions are defined in [`.travis.yml`](.travis.yml).
|
26
|
+
|
25
27
|
## Usage
|
26
28
|
|
27
29
|
Initialize a MetaInspector instance for an URL, like this:
|
@@ -73,7 +75,7 @@ page.root_url # Root url (scheme + host, like http://sitevalidator.co
|
|
73
75
|
page.head_links # an array of hashes of all head/links
|
74
76
|
page.stylesheets # an array of hashes of all head/links where rel='stylesheet'
|
75
77
|
page.canonicals # an array of hashes of all head/links where rel='canonical'
|
76
|
-
page.
|
78
|
+
page.feeds # Get rss or atom links in meta data fields as array of hash in the form { href: "...", title: "...", type: "..." }
|
77
79
|
```
|
78
80
|
|
79
81
|
### Texts
|
@@ -85,6 +87,12 @@ page.author # author of the page from the meta author tag
|
|
85
87
|
page.best_author # best author of the page, from a selection of candidates
|
86
88
|
page.description # returns the meta description
|
87
89
|
page.best_description # returns the first non-empty description between the following candidates: standard meta description, og:description, twitter:description, the first long paragraph
|
90
|
+
page.h1 # returns h1 text array
|
91
|
+
page.h2 # returns h2 text array
|
92
|
+
page.h3 # returns h3 text array
|
93
|
+
page.h4 # returns h4 text array
|
94
|
+
page.h5 # returns h5 text array
|
95
|
+
page.h6 # returns h6 text array
|
88
96
|
```
|
89
97
|
|
90
98
|
### Links
|
@@ -396,7 +404,8 @@ Web page scraping is tricky, you can expect to find different exceptions during
|
|
396
404
|
|
397
405
|
* `MetaInspector::TimeoutError`. When fetching a web page has taken too long.
|
398
406
|
* `MetaInspector::RequestError`. When there has been an error on the request phase. Examples: page not found, SSL failure, invalid URI.
|
399
|
-
* `MetaInspector::ParserError`. When there has been an error parsing the contents of the page.
|
407
|
+
* `MetaInspector::ParserError`. When there has been an error parsing the contents of the page.
|
408
|
+
* `MetaInspector::NonHtmlError`. When the contents of the page was not HTML. See also the `allow_non_html_content` option
|
400
409
|
|
401
410
|
## Examples
|
402
411
|
|
@@ -48,8 +48,8 @@ module MetaInspector
|
|
48
48
|
delegate [:content_type, :response] => :@request
|
49
49
|
|
50
50
|
delegate [:parsed, :title, :best_title, :author, :best_author,
|
51
|
-
:description, :best_description, :links,
|
52
|
-
:images, :feed, :charset, :meta_tags,
|
51
|
+
:h1, :h2, :h3, :h4, :h5, :h6, :description, :best_description, :links,
|
52
|
+
:images, :feeds, :feed, :charset, :meta_tags,
|
53
53
|
:meta_tag, :meta, :favicon,
|
54
54
|
:head_links, :stylesheets, :canonicals] => :@parser
|
55
55
|
|
@@ -66,10 +66,17 @@ module MetaInspector
|
|
66
66
|
'best_author' => best_author,
|
67
67
|
'description' => description,
|
68
68
|
'best_description' => best_description,
|
69
|
+
'h1' => h1,
|
70
|
+
'h2' => h2,
|
71
|
+
'h3' => h3,
|
72
|
+
'h4' => h4,
|
73
|
+
'h5' => h5,
|
74
|
+
'h6' => h6,
|
69
75
|
'links' => links.to_hash,
|
70
76
|
'images' => images.to_a,
|
71
77
|
'charset' => charset,
|
72
78
|
'feed' => feed,
|
79
|
+
'feeds' => feeds,
|
73
80
|
'content_type' => content_type,
|
74
81
|
'meta_tags' => meta_tags,
|
75
82
|
'favicon' => images.favicon,
|
@@ -105,7 +112,7 @@ module MetaInspector
|
|
105
112
|
|
106
113
|
def document
|
107
114
|
@document ||= if !allow_non_html_content && !content_type.nil? && content_type != 'text/html'
|
108
|
-
fail MetaInspector::
|
115
|
+
fail MetaInspector::NonHtmlError.new "The url provided contains #{content_type} content instead of text/html content"
|
109
116
|
else
|
110
117
|
@request.read
|
111
118
|
end
|
@@ -23,10 +23,11 @@ module MetaInspector
|
|
23
23
|
extend Forwardable
|
24
24
|
delegate [:url, :scheme, :host] => :@document
|
25
25
|
delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
|
26
|
-
delegate [:head_links, :stylesheets, :canonicals, :feed]
|
26
|
+
delegate [:head_links, :stylesheets, :canonicals, :feeds, :feed] => :@head_links_parser
|
27
27
|
delegate [:links, :base_url] => :@links_parser
|
28
28
|
delegate :images => :@images_parser
|
29
|
-
delegate [:title, :best_title, :author, :best_author, :description, :best_description
|
29
|
+
delegate [:title, :best_title, :author, :best_author, :description, :best_description,
|
30
|
+
:h1, :h2, :h3, :h4, :h5, :h6] => :@texts_parser
|
30
31
|
|
31
32
|
# Returns the whole parsed document
|
32
33
|
def parsed
|
@@ -3,6 +3,10 @@ module MetaInspector
|
|
3
3
|
class HeadLinksParser < Base
|
4
4
|
delegate [:parsed, :base_url] => :@main_parser
|
5
5
|
|
6
|
+
KNOWN_FEED_TYPES = %w[
|
7
|
+
application/rss+xml application/atom+xml application/json
|
8
|
+
].freeze
|
9
|
+
|
6
10
|
def head_links
|
7
11
|
@head_links ||= parsed.css('head link').map do |tag|
|
8
12
|
Hash[
|
@@ -24,16 +28,25 @@ module MetaInspector
|
|
24
28
|
@canonicals ||= head_links.select { |hl| hl[:rel] == 'canonical' }
|
25
29
|
end
|
26
30
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
def feeds
|
32
|
+
@feeds ||=
|
33
|
+
parsed.search("//link[@rel='alternate']").map do |link|
|
34
|
+
next if !KNOWN_FEED_TYPES.include?(link["type"]) || link["href"].to_s.strip == ''
|
31
35
|
|
32
|
-
|
36
|
+
{
|
37
|
+
title: link["title"],
|
38
|
+
href: URL.absolutify(link["href"], base_url),
|
39
|
+
type: link["type"]
|
40
|
+
}
|
41
|
+
end.compact
|
42
|
+
end
|
33
43
|
|
34
|
-
def
|
35
|
-
|
36
|
-
feed
|
44
|
+
def feed
|
45
|
+
warn "DEPRECATION: Use MetaInspector#feeds instead of #feed. The former gives you all feeds and their metadata, the latter will be removed."
|
46
|
+
@feed ||= begin
|
47
|
+
first_feed = feeds.find { |l| /\/(rss|atom)\+xml$/i =~ l[:type] } || {}
|
48
|
+
first_feed[:href]
|
49
|
+
end
|
37
50
|
end
|
38
51
|
end
|
39
52
|
end
|
@@ -29,14 +29,16 @@ module MetaInspector
|
|
29
29
|
# If none found, tries with Twitter image
|
30
30
|
def owner_suggested
|
31
31
|
suggested_img = content_of(meta['og:image']) || content_of(meta['twitter:image'])
|
32
|
-
URL.absolutify(suggested_img, base_url) if suggested_img
|
32
|
+
URL.absolutify(suggested_img, base_url, normalize: false) if suggested_img
|
33
33
|
end
|
34
34
|
|
35
35
|
# Returns an array of [img_url, width, height] sorted by image area (width * height)
|
36
36
|
def with_size
|
37
37
|
@with_size ||= begin
|
38
38
|
img_nodes = parsed.search('//img').select{ |img_node| img_node['src'] }
|
39
|
-
imgs_with_size = img_nodes.map
|
39
|
+
imgs_with_size = img_nodes.map do |img_node|
|
40
|
+
[URL.absolutify(img_node['src'], base_url, normalize: false), img_node['width'], img_node['height']]
|
41
|
+
end
|
40
42
|
imgs_with_size.uniq! { |url, width, height| url }
|
41
43
|
if @download_images
|
42
44
|
imgs_with_size.map! do |url, width, height|
|
@@ -71,7 +73,7 @@ module MetaInspector
|
|
71
73
|
def favicon
|
72
74
|
query = '//link[@rel="icon" or contains(@rel, "shortcut")]'
|
73
75
|
value = parsed.xpath(query)[0].attributes['href'].value
|
74
|
-
@favicon ||= URL.absolutify(value, base_url)
|
76
|
+
@favicon ||= URL.absolutify(value, base_url, normalize: false)
|
75
77
|
rescue
|
76
78
|
nil
|
77
79
|
end
|
@@ -83,7 +85,7 @@ module MetaInspector
|
|
83
85
|
end
|
84
86
|
|
85
87
|
def absolutified_images
|
86
|
-
parsed_images.map { |i| URL.absolutify(i, base_url) }
|
88
|
+
parsed_images.map { |i| URL.absolutify(i, base_url, normalize: false) }
|
87
89
|
end
|
88
90
|
|
89
91
|
def parsed_images
|
@@ -47,7 +47,8 @@ module MetaInspector
|
|
47
47
|
# This can be the one set on a <base> tag,
|
48
48
|
# or the url of the document if no <base> tag was found.
|
49
49
|
def base_url
|
50
|
-
base_href
|
50
|
+
current_base_href = base_href.to_s.strip.empty? ? nil : base_href
|
51
|
+
current_base_href || url
|
51
52
|
end
|
52
53
|
|
53
54
|
# Returns the value of the href attribute on the <base /> tag, if exists
|
@@ -13,6 +13,30 @@ module MetaInspector
|
|
13
13
|
@best_title ||= find_best_title
|
14
14
|
end
|
15
15
|
|
16
|
+
def h1
|
17
|
+
@h1 ||= find_heading('h1')
|
18
|
+
end
|
19
|
+
|
20
|
+
def h2
|
21
|
+
@h2 ||= find_heading('h2')
|
22
|
+
end
|
23
|
+
|
24
|
+
def h3
|
25
|
+
@h3 ||= find_heading('h3')
|
26
|
+
end
|
27
|
+
|
28
|
+
def h4
|
29
|
+
@h4 ||= find_heading('h4')
|
30
|
+
end
|
31
|
+
|
32
|
+
def h5
|
33
|
+
@h5 ||= find_heading('h5')
|
34
|
+
end
|
35
|
+
|
36
|
+
def h6
|
37
|
+
@h6 ||= find_heading('h6')
|
38
|
+
end
|
39
|
+
|
16
40
|
# Returns the meta author, if present
|
17
41
|
def author
|
18
42
|
@author ||= meta['author']
|
@@ -45,6 +69,10 @@ module MetaInspector
|
|
45
69
|
|
46
70
|
private
|
47
71
|
|
72
|
+
def find_heading(heading)
|
73
|
+
parsed.css(heading).map { |tag| tag.inner_text.strip.gsub(/\s+/, ' ') }.reject(&:empty?)
|
74
|
+
end
|
75
|
+
|
48
76
|
# Look for candidates per list of priority
|
49
77
|
def find_best_title
|
50
78
|
candidates = [
|
@@ -48,7 +48,7 @@ module MetaInspector
|
|
48
48
|
@response ||= fetch
|
49
49
|
rescue Faraday::TimeoutError => e
|
50
50
|
raise MetaInspector::TimeoutError.new(e)
|
51
|
-
rescue Faraday::
|
51
|
+
rescue Faraday::ConnectionFailed, Faraday::SSLError, URI::InvalidURIError, FaradayMiddleware::RedirectLimitReached => e
|
52
52
|
raise MetaInspector::RequestError.new(e)
|
53
53
|
end
|
54
54
|
|
data/lib/meta_inspector/url.rb
CHANGED
@@ -5,7 +5,7 @@ module MetaInspector
|
|
5
5
|
attr_reader :url
|
6
6
|
|
7
7
|
def initialize(initial_url, options = {})
|
8
|
-
options = defaults.merge(options)
|
8
|
+
options = self.class.defaults.merge(options)
|
9
9
|
|
10
10
|
@normalize = options[:normalize]
|
11
11
|
|
@@ -56,11 +56,13 @@ module MetaInspector
|
|
56
56
|
# http:, ftp:, telnet:, mailto:, javascript: ...
|
57
57
|
# Protocol-relative URLs are also resolved to use the same
|
58
58
|
# schema as the base_url
|
59
|
-
def self.absolutify(url, base_url)
|
59
|
+
def self.absolutify(url, base_url, options = {})
|
60
|
+
options = defaults.merge(options)
|
60
61
|
if url =~ /^\w*\:/i
|
61
|
-
MetaInspector::URL.new(url).url
|
62
|
+
MetaInspector::URL.new(url, options).url
|
62
63
|
else
|
63
|
-
Addressable::URI.join(base_url, url)
|
64
|
+
uri = Addressable::URI.join(base_url, url)
|
65
|
+
options[:normalize] ? uri.normalize.to_s : uri.to_s
|
64
66
|
end
|
65
67
|
rescue MetaInspector::ParserError, Addressable::URI::InvalidURIError, ArgumentError
|
66
68
|
nil
|
@@ -68,7 +70,7 @@ module MetaInspector
|
|
68
70
|
|
69
71
|
private
|
70
72
|
|
71
|
-
def defaults
|
73
|
+
def self.defaults
|
72
74
|
{ :normalize => true }
|
73
75
|
end
|
74
76
|
|
data/meta_inspector.gemspec
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
require File.expand_path('../lib/meta_inspector/version', __FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
|
-
gem.
|
5
|
-
gem.email =
|
4
|
+
gem.author = "Jaime Iniesta"
|
5
|
+
gem.email = "jaimeiniesta@gmail.com"
|
6
6
|
gem.description = %q{MetaInspector lets you scrape a web page and get its links, images, texts, meta tags...}
|
7
7
|
gem.summary = %q{MetaInspector is a ruby gem for web scraping purposes, that returns metadata from a given URL}
|
8
|
-
gem.homepage = "https://github.com/
|
8
|
+
gem.homepage = "https://github.com/metainspector/metainspector"
|
9
9
|
gem.license = "MIT"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,20 +14,20 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.require_paths = ["lib"]
|
15
15
|
gem.version = MetaInspector::VERSION
|
16
16
|
|
17
|
-
gem.add_dependency 'nokogiri', '~> 1.
|
18
|
-
gem.add_dependency 'faraday', '~> 0.
|
19
|
-
gem.add_dependency 'faraday_middleware', '~> 0.
|
20
|
-
gem.add_dependency 'faraday-cookie_jar', '~> 0.0'
|
21
|
-
gem.add_dependency 'faraday-http-cache', '~> 2.0'
|
22
|
-
gem.add_dependency 'faraday-encoding', '~> 0.0'
|
23
|
-
gem.add_dependency 'addressable', '~> 2.
|
24
|
-
gem.add_dependency 'fastimage', '~> 2.1'
|
25
|
-
gem.add_dependency 'nesty', '~> 1.0'
|
17
|
+
gem.add_dependency 'nokogiri', '~> 1.10.9'
|
18
|
+
gem.add_dependency 'faraday', '~> 1.0.0'
|
19
|
+
gem.add_dependency 'faraday_middleware', '~> 1.0.0'
|
20
|
+
gem.add_dependency 'faraday-cookie_jar', '~> 0.0.6'
|
21
|
+
gem.add_dependency 'faraday-http-cache', '~> 2.2.0'
|
22
|
+
gem.add_dependency 'faraday-encoding', '~> 0.0.5'
|
23
|
+
gem.add_dependency 'addressable', '~> 2.7.0'
|
24
|
+
gem.add_dependency 'fastimage', '~> 2.1.7'
|
25
|
+
gem.add_dependency 'nesty', '~> 1.0.2'
|
26
26
|
|
27
|
-
gem.add_development_dependency 'rspec', '~> 3.0'
|
28
|
-
gem.add_development_dependency 'webmock'
|
29
|
-
gem.add_development_dependency 'awesome_print'
|
30
|
-
gem.add_development_dependency 'rake', '~>
|
31
|
-
gem.add_development_dependency 'pry'
|
32
|
-
gem.add_development_dependency 'rubocop'
|
27
|
+
gem.add_development_dependency 'rspec', '~> 3.9.0'
|
28
|
+
gem.add_development_dependency 'webmock', '~> 3.8.3'
|
29
|
+
gem.add_development_dependency 'awesome_print', '~> 1.8.0'
|
30
|
+
gem.add_development_dependency 'rake', '~> 13.0.1'
|
31
|
+
gem.add_development_dependency 'pry', '~> 0.13.1'
|
32
|
+
gem.add_development_dependency 'rubocop', '~> 0.82.0'
|
33
33
|
end
|