metainspector 5.8.0 → 5.11.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -4
- data/CHANGELOG.md +22 -0
- data/README.md +5 -2
- data/lib/meta_inspector/document.rb +2 -1
- data/lib/meta_inspector/parser.rb +1 -1
- data/lib/meta_inspector/parsers/head_links.rb +21 -8
- data/lib/meta_inspector/parsers/links.rb +2 -1
- data/lib/meta_inspector/request.rb +1 -1
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +12 -12
- data/spec/document_spec.rb +1 -0
- data/spec/fixtures/feeds.response +23 -0
- data/spec/fixtures/relative_links_with_empty_base.response +22 -0
- data/spec/meta_inspector/head_links_spec.rb +4 -1
- data/spec/meta_inspector/links_spec.rb +35 -11
- data/spec/spec_helper.rb +2 -2
- metadata +28 -29
- data/spec/fixtures/iteh.at.response +0 -971
- data/spec/fixtures/tea-tron.com.response +0 -957
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 210e3ca023facc1a803bd66ab164c0c3f2669a1e64483807ec3ea500d2f907e9
|
4
|
+
data.tar.gz: 551f617c3a548856d4bcf3ea9ebff284ba61604b0c75e2a94db6e4606d570477
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad519a6c1b3e41dd95d37a44e5006970ce0281b917843d9845a4105219a2bf08d91d1ebe2462f3cee44c0fab819218adf441e954d35acbb58c255992d14513c9
|
7
|
+
data.tar.gz: 2ea5ffacad5a1e62145d604706407b7b7fa3a695ec839db21c92eaa7608c7e81544c728a385211c53d59712fe129feaf958693d341f1fc3e12449572a098b1eb
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,27 @@
|
|
1
1
|
# MetaInpector Changelog
|
2
2
|
|
3
|
+
## [Changes in 5.11.0](https://github.com/jaimeiniesta/metainspector/compare/v5.10.1...v5.11.0)
|
4
|
+
|
5
|
+
* Upgrade to Faraday 1.1.
|
6
|
+
|
7
|
+
## [Changes in 5.10.1](https://github.com/jaimeiniesta/metainspector/compare/v5.10.0...v5.10.1)
|
8
|
+
|
9
|
+
* Fix for empty base_href. Makes relative links work when base_href is nil but empty ("").
|
10
|
+
* Drop support for Ruby 2.4, add support for Ruby 2.7.
|
11
|
+
|
12
|
+
## [Changes in 5.10](https://github.com/jaimeiniesta/metainspector/compare/v5.9.0...v5.10.0)
|
13
|
+
|
14
|
+
* Upgrade to Faraday 1.0.
|
15
|
+
|
16
|
+
## [Changes in 5.9](https://github.com/jaimeiniesta/metainspector/compare/v5.8.0...v5.9.0)
|
17
|
+
|
18
|
+
* Added #feeds method to retrieve all feeds of a page.
|
19
|
+
* Adds deprecation warning on #feed method.
|
20
|
+
|
21
|
+
## [Changes in 5.8](https://github.com/jaimeiniesta/metainspector/compare/v5.7.0...v5.8.0)
|
22
|
+
|
23
|
+
* Added h1..h6 support.
|
24
|
+
|
3
25
|
## [Changes in 5.7](https://github.com/jaimeiniesta/metainspector/compare/v5.6.0...v5.7.0)
|
4
26
|
|
5
27
|
* Avoids normalizing image URLs. https://github.com/jaimeiniesta/metainspector/pull/241
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
# MetaInspector
|
1
|
+
# MetaInspector
|
2
|
+
[![Gem Version](https://badge.fury.io/rb/metainspector.svg)](http://badge.fury.io/rb/metainspector) [![Build Status](https://secure.travis-ci.org/jaimeiniesta/metainspector.png)](http://travis-ci.org/jaimeiniesta/metainspector) [![SemVer](https://api.dependabot.com/badges/compatibility_score?dependency-name=metainspector&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score.html?dependency-name=metainspector&package-manager=bundler&version-scheme=semver) [![Code Climate](https://codeclimate.com/github/jaimeiniesta/metainspector/badges/gpa.svg)](https://codeclimate.com/github/jaimeiniesta/metainspector) [![Mentioned in Awesome Ruby](https://awesome.re/mentioned-badge.svg)](https://github.com/markets/awesome-ruby)
|
2
3
|
|
3
4
|
MetaInspector is a gem for web scraping purposes.
|
4
5
|
|
@@ -22,6 +23,8 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
22
23
|
gem 'metainspector'
|
23
24
|
```
|
24
25
|
|
26
|
+
Supported Ruby versions are defined in [`.travis.yml`](.travis.yml).
|
27
|
+
|
25
28
|
## Usage
|
26
29
|
|
27
30
|
Initialize a MetaInspector instance for an URL, like this:
|
@@ -73,7 +76,7 @@ page.root_url # Root url (scheme + host, like http://sitevalidator.co
|
|
73
76
|
page.head_links # an array of hashes of all head/links
|
74
77
|
page.stylesheets # an array of hashes of all head/links where rel='stylesheet'
|
75
78
|
page.canonicals # an array of hashes of all head/links where rel='canonical'
|
76
|
-
page.
|
79
|
+
page.feeds # Get rss or atom links in meta data fields as array of hash in the form { href: "...", title: "...", type: "..." }
|
77
80
|
```
|
78
81
|
|
79
82
|
### Texts
|
@@ -49,7 +49,7 @@ module MetaInspector
|
|
49
49
|
|
50
50
|
delegate [:parsed, :title, :best_title, :author, :best_author,
|
51
51
|
:h1, :h2, :h3, :h4, :h5, :h6, :description, :best_description, :links,
|
52
|
-
:images, :feed, :charset, :meta_tags,
|
52
|
+
:images, :feeds, :feed, :charset, :meta_tags,
|
53
53
|
:meta_tag, :meta, :favicon,
|
54
54
|
:head_links, :stylesheets, :canonicals] => :@parser
|
55
55
|
|
@@ -76,6 +76,7 @@ module MetaInspector
|
|
76
76
|
'images' => images.to_a,
|
77
77
|
'charset' => charset,
|
78
78
|
'feed' => feed,
|
79
|
+
'feeds' => feeds,
|
79
80
|
'content_type' => content_type,
|
80
81
|
'meta_tags' => meta_tags,
|
81
82
|
'favicon' => images.favicon,
|
@@ -23,7 +23,7 @@ module MetaInspector
|
|
23
23
|
extend Forwardable
|
24
24
|
delegate [:url, :scheme, :host] => :@document
|
25
25
|
delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
|
26
|
-
delegate [:head_links, :stylesheets, :canonicals, :feed]
|
26
|
+
delegate [:head_links, :stylesheets, :canonicals, :feeds, :feed] => :@head_links_parser
|
27
27
|
delegate [:links, :base_url] => :@links_parser
|
28
28
|
delegate :images => :@images_parser
|
29
29
|
delegate [:title, :best_title, :author, :best_author, :description, :best_description,
|
@@ -3,6 +3,10 @@ module MetaInspector
|
|
3
3
|
class HeadLinksParser < Base
|
4
4
|
delegate [:parsed, :base_url] => :@main_parser
|
5
5
|
|
6
|
+
KNOWN_FEED_TYPES = %w[
|
7
|
+
application/rss+xml application/atom+xml application/json
|
8
|
+
].freeze
|
9
|
+
|
6
10
|
def head_links
|
7
11
|
@head_links ||= parsed.css('head link').map do |tag|
|
8
12
|
Hash[
|
@@ -24,16 +28,25 @@ module MetaInspector
|
|
24
28
|
@canonicals ||= head_links.select { |hl| hl[:rel] == 'canonical' }
|
25
29
|
end
|
26
30
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
def feeds
|
32
|
+
@feeds ||=
|
33
|
+
parsed.search("//link[@rel='alternate']").map do |link|
|
34
|
+
next if !KNOWN_FEED_TYPES.include?(link["type"]) || link["href"].to_s.strip == ''
|
31
35
|
|
32
|
-
|
36
|
+
{
|
37
|
+
title: link["title"],
|
38
|
+
href: URL.absolutify(link["href"], base_url),
|
39
|
+
type: link["type"]
|
40
|
+
}
|
41
|
+
end.compact
|
42
|
+
end
|
33
43
|
|
34
|
-
def
|
35
|
-
|
36
|
-
feed
|
44
|
+
def feed
|
45
|
+
warn "DEPRECATION: Use MetaInspector#feeds instead of #feed. The former gives you all feeds and their metadata, the latter will be removed."
|
46
|
+
@feed ||= begin
|
47
|
+
first_feed = feeds.find { |l| /\/(rss|atom)\+xml$/i =~ l[:type] } || {}
|
48
|
+
first_feed[:href]
|
49
|
+
end
|
37
50
|
end
|
38
51
|
end
|
39
52
|
end
|
@@ -47,7 +47,8 @@ module MetaInspector
|
|
47
47
|
# This can be the one set on a <base> tag,
|
48
48
|
# or the url of the document if no <base> tag was found.
|
49
49
|
def base_url
|
50
|
-
base_href
|
50
|
+
current_base_href = base_href.to_s.strip.empty? ? nil : base_href
|
51
|
+
current_base_href || url
|
51
52
|
end
|
52
53
|
|
53
54
|
# Returns the value of the href attribute on the <base /> tag, if exists
|
@@ -48,7 +48,7 @@ module MetaInspector
|
|
48
48
|
@response ||= fetch
|
49
49
|
rescue Faraday::TimeoutError => e
|
50
50
|
raise MetaInspector::TimeoutError.new(e)
|
51
|
-
rescue Faraday::
|
51
|
+
rescue Faraday::ConnectionFailed, Faraday::SSLError, URI::InvalidURIError, FaradayMiddleware::RedirectLimitReached => e
|
52
52
|
raise MetaInspector::RequestError.new(e)
|
53
53
|
end
|
54
54
|
|
data/meta_inspector.gemspec
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
require File.expand_path('../lib/meta_inspector/version', __FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
|
-
gem.
|
5
|
-
gem.email =
|
4
|
+
gem.author = "Jaime Iniesta"
|
5
|
+
gem.email = "jaimeiniesta@gmail.com"
|
6
6
|
gem.description = %q{MetaInspector lets you scrape a web page and get its links, images, texts, meta tags...}
|
7
7
|
gem.summary = %q{MetaInspector is a ruby gem for web scraping purposes, that returns metadata from a given URL}
|
8
|
-
gem.homepage = "https://github.com/
|
8
|
+
gem.homepage = "https://github.com/metainspector/metainspector"
|
9
9
|
gem.license = "MIT"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,20 +14,20 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.require_paths = ["lib"]
|
15
15
|
gem.version = MetaInspector::VERSION
|
16
16
|
|
17
|
-
gem.add_dependency 'nokogiri', '~> 1.
|
18
|
-
gem.add_dependency 'faraday', '~>
|
19
|
-
gem.add_dependency 'faraday_middleware', '~> 0.
|
20
|
-
gem.add_dependency 'faraday-cookie_jar', '~> 0.0.
|
21
|
-
gem.add_dependency 'faraday-http-cache', '~> 2.
|
17
|
+
gem.add_dependency 'nokogiri', '~> 1.11.0'
|
18
|
+
gem.add_dependency 'faraday', '~> 1.1.0'
|
19
|
+
gem.add_dependency 'faraday_middleware', '~> 1.0.0'
|
20
|
+
gem.add_dependency 'faraday-cookie_jar', '~> 0.0.7'
|
21
|
+
gem.add_dependency 'faraday-http-cache', '~> 2.2.0'
|
22
22
|
gem.add_dependency 'faraday-encoding', '~> 0.0.5'
|
23
23
|
gem.add_dependency 'addressable', '~> 2.7.0'
|
24
24
|
gem.add_dependency 'fastimage', '~> 2.1.7'
|
25
25
|
gem.add_dependency 'nesty', '~> 1.0.2'
|
26
26
|
|
27
27
|
gem.add_development_dependency 'rspec', '~> 3.9.0'
|
28
|
-
gem.add_development_dependency 'webmock', '~> 3.
|
28
|
+
gem.add_development_dependency 'webmock', '~> 3.8.3'
|
29
29
|
gem.add_development_dependency 'awesome_print', '~> 1.8.0'
|
30
|
-
gem.add_development_dependency 'rake', '~> 13.0.
|
31
|
-
gem.add_development_dependency 'pry', '~> 0.
|
32
|
-
gem.add_development_dependency 'rubocop', '~> 0.
|
30
|
+
gem.add_development_dependency 'rake', '~> 13.0.1'
|
31
|
+
gem.add_development_dependency 'pry', '~> 0.13.1'
|
32
|
+
gem.add_development_dependency 'rubocop', '~> 0.82.0'
|
33
33
|
end
|
data/spec/document_spec.rb
CHANGED
@@ -44,6 +44,7 @@ describe MetaInspector::Document do
|
|
44
44
|
"images" => ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"],
|
45
45
|
"charset" => "utf-8",
|
46
46
|
"feed" => "http://feeds.feedburner.com/PageRankAlert",
|
47
|
+
"feeds" => [{href: "http://feeds.feedburner.com/PageRankAlert", title: "PageRankAlert.com blog", type: "application/rss+xml"}],
|
47
48
|
"h1" => [],
|
48
49
|
"h2" => ["Track your PageRank changes"],
|
49
50
|
"h3" => ["WHAT'S YOUR PAGERANK?"],
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200
|
2
|
+
date: Wed, 08 Jan 2020 23:21:58 GMT
|
3
|
+
content-type: text/html; charset=UTF-8
|
4
|
+
server: nginx/0.7.67
|
5
|
+
|
6
|
+
<!DOCTYPE html>
|
7
|
+
<html>
|
8
|
+
<head>
|
9
|
+
<title>a page with feeds</title>
|
10
|
+
<link rel="alternate" title="Articles - JSON Feed" type="application/json" href="https://example.org/feed.json" />
|
11
|
+
<link rel="alternate" title="Comments - JSON Feed" type="application/json" href="https://example.org/feed/comments.json" />
|
12
|
+
<link rel="alternate" title="Articles - RSS Feed" type="application/rss+xml" href="https://example.org/feed.rss" />
|
13
|
+
<link rel="alternate" title="Comments - RSS Feed" type="application/rss+xml" href="https://example.org/feed/comments.rss" />
|
14
|
+
<link rel="alternate" title="Articles - Atom Feed" type="application/atom+xml" href="https://example.org/feed.xml" />
|
15
|
+
<link rel="alternate" title="Comments - Atom Feed" type="application/atom+xml" href="https://example.org/feed/comments.xml" />
|
16
|
+
|
17
|
+
<link rel="alternate" title="Invalid Feed" />
|
18
|
+
<link rel="alternate" title="Feed with empty href" type="application/atom+xml" href="" />
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/1.0.5
|
3
|
+
Date: Thu, 29 Dec 2011 23:10:13 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Content-Length: 15013
|
6
|
+
Last-Modified: Fri, 02 Dec 2011 21:00:49 GMT
|
7
|
+
Connection: keep-alive
|
8
|
+
Accept-Ranges: bytes
|
9
|
+
|
10
|
+
<!DOCTYPE html>
|
11
|
+
<html>
|
12
|
+
<head>
|
13
|
+
<base href=""/>
|
14
|
+
<meta charset="utf-8" />
|
15
|
+
<title>Relative links</title>
|
16
|
+
</head>
|
17
|
+
<body>
|
18
|
+
<p>Relative links</p>
|
19
|
+
<a href="about">About</a>
|
20
|
+
<a href="../sitemap">Sitemap</a>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -39,7 +39,10 @@ describe MetaInspector do
|
|
39
39
|
context "on page with some broken feed links" do
|
40
40
|
let(:page){ MetaInspector.new('http://example.com/broken_head_links') }
|
41
41
|
it "tries to find correct one" do
|
42
|
-
|
42
|
+
expected = [
|
43
|
+
{ title: "TechCrunch RSS feed", href: "http://www.guardian.co.uk/media/techcrunch/rss", type: "application/rss+xml" }
|
44
|
+
]
|
45
|
+
expect(page.feeds).to eq(expected)
|
43
46
|
end
|
44
47
|
end
|
45
48
|
end
|
@@ -145,6 +145,13 @@ describe MetaInspector do
|
|
145
145
|
end
|
146
146
|
end
|
147
147
|
|
148
|
+
describe 'Relative links with empty or blank base' do
|
149
|
+
it 'should get the relative links from a document' do
|
150
|
+
m = MetaInspector.new('http://relativewithemptybase.com/company')
|
151
|
+
expect(m.links.internal).to eq(['http://relativewithemptybase.com/about', 'http://relativewithemptybase.com/sitemap'])
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
148
155
|
describe 'Relative links with base' do
|
149
156
|
it 'should get the relative links from a document' do
|
150
157
|
m = MetaInspector.new('http://relativewithbase.com/company/page2')
|
@@ -190,20 +197,37 @@ describe MetaInspector do
|
|
190
197
|
end
|
191
198
|
end
|
192
199
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
200
|
+
context "Feeds" do
|
201
|
+
let(:meta) { MetaInspector.new('http://feeds.example.com') }
|
202
|
+
|
203
|
+
describe "#feeds" do
|
204
|
+
it "should return all the document's feeds" do
|
205
|
+
expected = [
|
206
|
+
{ title: "Articles - JSON Feed", href: "https://example.org/feed.json", type: "application/json" },
|
207
|
+
{ title: "Comments - JSON Feed", href: "https://example.org/feed/comments.json", type: "application/json" },
|
208
|
+
{ title: "Articles - RSS Feed", href: "https://example.org/feed.rss", type: "application/rss+xml" },
|
209
|
+
{ title: "Comments - RSS Feed", href: "https://example.org/feed/comments.rss", type: "application/rss+xml" },
|
210
|
+
{ title: "Articles - Atom Feed", href: "https://example.org/feed.xml", type: "application/atom+xml" },
|
211
|
+
{ title: "Comments - Atom Feed", href: "https://example.org/feed/comments.xml", type: "application/atom+xml" }
|
212
|
+
]
|
213
|
+
expect(meta.feeds).to eq(expected)
|
214
|
+
end
|
198
215
|
|
199
|
-
|
200
|
-
|
201
|
-
|
216
|
+
it "should return nothing if no feeds found" do
|
217
|
+
@m = MetaInspector.new('http://www.alazan.com')
|
218
|
+
expect(@m.feeds).to eq([])
|
219
|
+
end
|
202
220
|
end
|
203
221
|
|
204
|
-
|
205
|
-
|
206
|
-
|
222
|
+
describe "#feed" do
|
223
|
+
it "should return the first feed's href" do
|
224
|
+
expect(meta.feed).to eq("https://example.org/feed.rss")
|
225
|
+
end
|
226
|
+
|
227
|
+
it "should give a deprecation warning" do
|
228
|
+
warning = "DEPRECATION: Use MetaInspector#feeds instead of #feed. The former gives you all feeds and their metadata, the latter will be removed.\n"
|
229
|
+
expect { meta.feed }.to output(warning).to_stderr
|
230
|
+
end
|
207
231
|
end
|
208
232
|
end
|
209
233
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -65,12 +65,11 @@ RSpec.configure do |config|
|
|
65
65
|
stub_request(:get, "http://relativewithbase.com/").to_return(fixture_file("relative_links_with_base.response"))
|
66
66
|
stub_request(:get, "http://relativewithbase.com/company/page2").to_return(fixture_file("relative_links_with_base.response"))
|
67
67
|
stub_request(:get, "http://relativewithbase.com/company/page2/").to_return(fixture_file("relative_links_with_base.response"))
|
68
|
+
stub_request(:get, "http://relativewithemptybase.com/company").to_return(fixture_file("relative_links_with_empty_base.response"))
|
68
69
|
stub_request(:get, "http://theonion-no-description.com").to_return(fixture_file("theonion-no-description.com.response"))
|
69
70
|
stub_request(:get, "http://www.24-horas.mx/mexico-firma-acuerdo-bilateral-automotriz-con-argentina/").to_return(fixture_file("relative_og_image.response"))
|
70
71
|
stub_request(:get, "http://www.alazan.com").to_return(fixture_file("alazan.com.response"))
|
71
72
|
stub_request(:get, "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups").to_return(fixture_file("guardian.co.uk.response"))
|
72
|
-
stub_request(:get, "http://www.iteh.at").to_return(fixture_file("iteh.at.response"))
|
73
|
-
stub_request(:get, "http://www.tea-tron.com/jbravo/blog/").to_return(fixture_file("tea-tron.com.response"))
|
74
73
|
stub_request(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/").to_return(fixture_file("theonion.com.response"))
|
75
74
|
stub_request(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc").to_return(fixture_file("youtube.response"))
|
76
75
|
stub_request(:get, "http://www.youtube.com/watch?v=short_title").to_return(fixture_file("youtube_short_title.response"))
|
@@ -79,5 +78,6 @@ RSpec.configure do |config|
|
|
79
78
|
stub_request(:get, "https://twitter.com/markupvalidator").to_return(fixture_file("twitter_markupvalidator.response"))
|
80
79
|
stub_request(:get, "https://www.facebook.com/").to_return(fixture_file("https.facebook.com.response"))
|
81
80
|
stub_request(:get, "http://example.com/meta_tags_empty").to_return(fixture_file("meta_tags_empty.response"))
|
81
|
+
stub_request(:get, "http://feeds.example.com").to_return(fixture_file("feeds.response"))
|
82
82
|
end
|
83
83
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,70 +16,70 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
19
|
+
version: 1.11.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
26
|
+
version: 1.11.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: faraday
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 1.1.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 1.1.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: faraday_middleware
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 1.0.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 1.0.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: faraday-cookie_jar
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.0.
|
61
|
+
version: 0.0.7
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.0.
|
68
|
+
version: 0.0.7
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: faraday-http-cache
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 2.
|
75
|
+
version: 2.2.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 2.
|
82
|
+
version: 2.2.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: faraday-encoding
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,14 +156,14 @@ dependencies:
|
|
156
156
|
requirements:
|
157
157
|
- - "~>"
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 3.
|
159
|
+
version: 3.8.3
|
160
160
|
type: :development
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 3.
|
166
|
+
version: 3.8.3
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: awesome_print
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -184,46 +184,45 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 13.0.
|
187
|
+
version: 13.0.1
|
188
188
|
type: :development
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 13.0.
|
194
|
+
version: 13.0.1
|
195
195
|
- !ruby/object:Gem::Dependency
|
196
196
|
name: pry
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 0.
|
201
|
+
version: 0.13.1
|
202
202
|
type: :development
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version: 0.
|
208
|
+
version: 0.13.1
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
210
|
name: rubocop
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
212
212
|
requirements:
|
213
213
|
- - "~>"
|
214
214
|
- !ruby/object:Gem::Version
|
215
|
-
version: 0.
|
215
|
+
version: 0.82.0
|
216
216
|
type: :development
|
217
217
|
prerelease: false
|
218
218
|
version_requirements: !ruby/object:Gem::Requirement
|
219
219
|
requirements:
|
220
220
|
- - "~>"
|
221
221
|
- !ruby/object:Gem::Version
|
222
|
-
version: 0.
|
222
|
+
version: 0.82.0
|
223
223
|
description: MetaInspector lets you scrape a web page and get its links, images, texts,
|
224
224
|
meta tags...
|
225
|
-
email:
|
226
|
-
- jaimeiniesta@gmail.com
|
225
|
+
email: jaimeiniesta@gmail.com
|
227
226
|
executables: []
|
228
227
|
extensions: []
|
229
228
|
extra_rdoc_files: []
|
@@ -278,6 +277,7 @@ files:
|
|
278
277
|
- spec/fixtures/encoding.response
|
279
278
|
- spec/fixtures/example.response
|
280
279
|
- spec/fixtures/facebook.com.response
|
280
|
+
- spec/fixtures/feeds.response
|
281
281
|
- spec/fixtures/guardian.co.uk.response
|
282
282
|
- spec/fixtures/head_links.response
|
283
283
|
- spec/fixtures/headings.response
|
@@ -286,7 +286,6 @@ files:
|
|
286
286
|
- spec/fixtures/invalid_byte_seq.response
|
287
287
|
- spec/fixtures/invalid_href.response
|
288
288
|
- spec/fixtures/invalid_utf8_byte_seq.response
|
289
|
-
- spec/fixtures/iteh.at.response
|
290
289
|
- spec/fixtures/largest_image_in_html.response
|
291
290
|
- spec/fixtures/largest_image_using_image_size.response
|
292
291
|
- spec/fixtures/malformed_href.response
|
@@ -303,8 +302,8 @@ files:
|
|
303
302
|
- spec/fixtures/protocol_relative.response
|
304
303
|
- spec/fixtures/relative_links.response
|
305
304
|
- spec/fixtures/relative_links_with_base.response
|
305
|
+
- spec/fixtures/relative_links_with_empty_base.response
|
306
306
|
- spec/fixtures/relative_og_image.response
|
307
|
-
- spec/fixtures/tea-tron.com.response
|
308
307
|
- spec/fixtures/theonion-no-description.com.response
|
309
308
|
- spec/fixtures/theonion.com.response
|
310
309
|
- spec/fixtures/title_best_choice.response
|
@@ -330,11 +329,11 @@ files:
|
|
330
329
|
- spec/request_spec.rb
|
331
330
|
- spec/spec_helper.rb
|
332
331
|
- spec/url_spec.rb
|
333
|
-
homepage: https://github.com/
|
332
|
+
homepage: https://github.com/metainspector/metainspector
|
334
333
|
licenses:
|
335
334
|
- MIT
|
336
335
|
metadata: {}
|
337
|
-
post_install_message:
|
336
|
+
post_install_message:
|
338
337
|
rdoc_options: []
|
339
338
|
require_paths:
|
340
339
|
- lib
|
@@ -349,8 +348,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
349
348
|
- !ruby/object:Gem::Version
|
350
349
|
version: '0'
|
351
350
|
requirements: []
|
352
|
-
rubygems_version: 3.
|
353
|
-
signing_key:
|
351
|
+
rubygems_version: 3.1.2
|
352
|
+
signing_key:
|
354
353
|
specification_version: 4
|
355
354
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns metadata
|
356
355
|
from a given URL
|