metainspector 5.8.0 → 5.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -4
- data/CHANGELOG.md +22 -0
- data/README.md +5 -2
- data/lib/meta_inspector/document.rb +2 -1
- data/lib/meta_inspector/parser.rb +1 -1
- data/lib/meta_inspector/parsers/head_links.rb +21 -8
- data/lib/meta_inspector/parsers/links.rb +2 -1
- data/lib/meta_inspector/request.rb +1 -1
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +12 -12
- data/spec/document_spec.rb +1 -0
- data/spec/fixtures/feeds.response +23 -0
- data/spec/fixtures/relative_links_with_empty_base.response +22 -0
- data/spec/meta_inspector/head_links_spec.rb +4 -1
- data/spec/meta_inspector/links_spec.rb +35 -11
- data/spec/spec_helper.rb +2 -2
- metadata +28 -29
- data/spec/fixtures/iteh.at.response +0 -971
- data/spec/fixtures/tea-tron.com.response +0 -957
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 210e3ca023facc1a803bd66ab164c0c3f2669a1e64483807ec3ea500d2f907e9
|
4
|
+
data.tar.gz: 551f617c3a548856d4bcf3ea9ebff284ba61604b0c75e2a94db6e4606d570477
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad519a6c1b3e41dd95d37a44e5006970ce0281b917843d9845a4105219a2bf08d91d1ebe2462f3cee44c0fab819218adf441e954d35acbb58c255992d14513c9
|
7
|
+
data.tar.gz: 2ea5ffacad5a1e62145d604706407b7b7fa3a695ec839db21c92eaa7608c7e81544c728a385211c53d59712fe129feaf958693d341f1fc3e12449572a098b1eb
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,27 @@
|
|
1
1
|
# MetaInpector Changelog
|
2
2
|
|
3
|
+
## [Changes in 5.11.0](https://github.com/jaimeiniesta/metainspector/compare/v5.10.1...v5.11.0)
|
4
|
+
|
5
|
+
* Upgrade to Faraday 1.1.
|
6
|
+
|
7
|
+
## [Changes in 5.10.1](https://github.com/jaimeiniesta/metainspector/compare/v5.10.0...v5.10.1)
|
8
|
+
|
9
|
+
* Fix for empty base_href. Makes relative links work when base_href is nil but empty ("").
|
10
|
+
* Drop support for Ruby 2.4, add support for Ruby 2.7.
|
11
|
+
|
12
|
+
## [Changes in 5.10](https://github.com/jaimeiniesta/metainspector/compare/v5.9.0...v5.10.0)
|
13
|
+
|
14
|
+
* Upgrade to Faraday 1.0.
|
15
|
+
|
16
|
+
## [Changes in 5.9](https://github.com/jaimeiniesta/metainspector/compare/v5.8.0...v5.9.0)
|
17
|
+
|
18
|
+
* Added #feeds method to retrieve all feeds of a page.
|
19
|
+
* Adds deprecation warning on #feed method.
|
20
|
+
|
21
|
+
## [Changes in 5.8](https://github.com/jaimeiniesta/metainspector/compare/v5.7.0...v5.8.0)
|
22
|
+
|
23
|
+
* Added h1..h6 support.
|
24
|
+
|
3
25
|
## [Changes in 5.7](https://github.com/jaimeiniesta/metainspector/compare/v5.6.0...v5.7.0)
|
4
26
|
|
5
27
|
* Avoids normalizing image URLs. https://github.com/jaimeiniesta/metainspector/pull/241
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
# MetaInspector
|
1
|
+
# MetaInspector
|
2
|
+
[](http://badge.fury.io/rb/metainspector) [](http://travis-ci.org/jaimeiniesta/metainspector) [](https://dependabot.com/compatibility-score.html?dependency-name=metainspector&package-manager=bundler&version-scheme=semver) [](https://codeclimate.com/github/jaimeiniesta/metainspector) [](https://github.com/markets/awesome-ruby)
|
2
3
|
|
3
4
|
MetaInspector is a gem for web scraping purposes.
|
4
5
|
|
@@ -22,6 +23,8 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
22
23
|
gem 'metainspector'
|
23
24
|
```
|
24
25
|
|
26
|
+
Supported Ruby versions are defined in [`.travis.yml`](.travis.yml).
|
27
|
+
|
25
28
|
## Usage
|
26
29
|
|
27
30
|
Initialize a MetaInspector instance for an URL, like this:
|
@@ -73,7 +76,7 @@ page.root_url # Root url (scheme + host, like http://sitevalidator.co
|
|
73
76
|
page.head_links # an array of hashes of all head/links
|
74
77
|
page.stylesheets # an array of hashes of all head/links where rel='stylesheet'
|
75
78
|
page.canonicals # an array of hashes of all head/links where rel='canonical'
|
76
|
-
page.
|
79
|
+
page.feeds # Get rss or atom links in meta data fields as array of hash in the form { href: "...", title: "...", type: "..." }
|
77
80
|
```
|
78
81
|
|
79
82
|
### Texts
|
@@ -49,7 +49,7 @@ module MetaInspector
|
|
49
49
|
|
50
50
|
delegate [:parsed, :title, :best_title, :author, :best_author,
|
51
51
|
:h1, :h2, :h3, :h4, :h5, :h6, :description, :best_description, :links,
|
52
|
-
:images, :feed, :charset, :meta_tags,
|
52
|
+
:images, :feeds, :feed, :charset, :meta_tags,
|
53
53
|
:meta_tag, :meta, :favicon,
|
54
54
|
:head_links, :stylesheets, :canonicals] => :@parser
|
55
55
|
|
@@ -76,6 +76,7 @@ module MetaInspector
|
|
76
76
|
'images' => images.to_a,
|
77
77
|
'charset' => charset,
|
78
78
|
'feed' => feed,
|
79
|
+
'feeds' => feeds,
|
79
80
|
'content_type' => content_type,
|
80
81
|
'meta_tags' => meta_tags,
|
81
82
|
'favicon' => images.favicon,
|
@@ -23,7 +23,7 @@ module MetaInspector
|
|
23
23
|
extend Forwardable
|
24
24
|
delegate [:url, :scheme, :host] => :@document
|
25
25
|
delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
|
26
|
-
delegate [:head_links, :stylesheets, :canonicals, :feed]
|
26
|
+
delegate [:head_links, :stylesheets, :canonicals, :feeds, :feed] => :@head_links_parser
|
27
27
|
delegate [:links, :base_url] => :@links_parser
|
28
28
|
delegate :images => :@images_parser
|
29
29
|
delegate [:title, :best_title, :author, :best_author, :description, :best_description,
|
@@ -3,6 +3,10 @@ module MetaInspector
|
|
3
3
|
class HeadLinksParser < Base
|
4
4
|
delegate [:parsed, :base_url] => :@main_parser
|
5
5
|
|
6
|
+
KNOWN_FEED_TYPES = %w[
|
7
|
+
application/rss+xml application/atom+xml application/json
|
8
|
+
].freeze
|
9
|
+
|
6
10
|
def head_links
|
7
11
|
@head_links ||= parsed.css('head link').map do |tag|
|
8
12
|
Hash[
|
@@ -24,16 +28,25 @@ module MetaInspector
|
|
24
28
|
@canonicals ||= head_links.select { |hl| hl[:rel] == 'canonical' }
|
25
29
|
end
|
26
30
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
def feeds
|
32
|
+
@feeds ||=
|
33
|
+
parsed.search("//link[@rel='alternate']").map do |link|
|
34
|
+
next if !KNOWN_FEED_TYPES.include?(link["type"]) || link["href"].to_s.strip == ''
|
31
35
|
|
32
|
-
|
36
|
+
{
|
37
|
+
title: link["title"],
|
38
|
+
href: URL.absolutify(link["href"], base_url),
|
39
|
+
type: link["type"]
|
40
|
+
}
|
41
|
+
end.compact
|
42
|
+
end
|
33
43
|
|
34
|
-
def
|
35
|
-
|
36
|
-
feed
|
44
|
+
def feed
|
45
|
+
warn "DEPRECATION: Use MetaInspector#feeds instead of #feed. The former gives you all feeds and their metadata, the latter will be removed."
|
46
|
+
@feed ||= begin
|
47
|
+
first_feed = feeds.find { |l| /\/(rss|atom)\+xml$/i =~ l[:type] } || {}
|
48
|
+
first_feed[:href]
|
49
|
+
end
|
37
50
|
end
|
38
51
|
end
|
39
52
|
end
|
@@ -47,7 +47,8 @@ module MetaInspector
|
|
47
47
|
# This can be the one set on a <base> tag,
|
48
48
|
# or the url of the document if no <base> tag was found.
|
49
49
|
def base_url
|
50
|
-
base_href
|
50
|
+
current_base_href = base_href.to_s.strip.empty? ? nil : base_href
|
51
|
+
current_base_href || url
|
51
52
|
end
|
52
53
|
|
53
54
|
# Returns the value of the href attribute on the <base /> tag, if exists
|
@@ -48,7 +48,7 @@ module MetaInspector
|
|
48
48
|
@response ||= fetch
|
49
49
|
rescue Faraday::TimeoutError => e
|
50
50
|
raise MetaInspector::TimeoutError.new(e)
|
51
|
-
rescue Faraday::
|
51
|
+
rescue Faraday::ConnectionFailed, Faraday::SSLError, URI::InvalidURIError, FaradayMiddleware::RedirectLimitReached => e
|
52
52
|
raise MetaInspector::RequestError.new(e)
|
53
53
|
end
|
54
54
|
|
data/meta_inspector.gemspec
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
require File.expand_path('../lib/meta_inspector/version', __FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
|
-
gem.
|
5
|
-
gem.email =
|
4
|
+
gem.author = "Jaime Iniesta"
|
5
|
+
gem.email = "jaimeiniesta@gmail.com"
|
6
6
|
gem.description = %q{MetaInspector lets you scrape a web page and get its links, images, texts, meta tags...}
|
7
7
|
gem.summary = %q{MetaInspector is a ruby gem for web scraping purposes, that returns metadata from a given URL}
|
8
|
-
gem.homepage = "https://github.com/
|
8
|
+
gem.homepage = "https://github.com/metainspector/metainspector"
|
9
9
|
gem.license = "MIT"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,20 +14,20 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.require_paths = ["lib"]
|
15
15
|
gem.version = MetaInspector::VERSION
|
16
16
|
|
17
|
-
gem.add_dependency 'nokogiri', '~> 1.
|
18
|
-
gem.add_dependency 'faraday', '~>
|
19
|
-
gem.add_dependency 'faraday_middleware', '~> 0.
|
20
|
-
gem.add_dependency 'faraday-cookie_jar', '~> 0.0.
|
21
|
-
gem.add_dependency 'faraday-http-cache', '~> 2.
|
17
|
+
gem.add_dependency 'nokogiri', '~> 1.11.0'
|
18
|
+
gem.add_dependency 'faraday', '~> 1.1.0'
|
19
|
+
gem.add_dependency 'faraday_middleware', '~> 1.0.0'
|
20
|
+
gem.add_dependency 'faraday-cookie_jar', '~> 0.0.7'
|
21
|
+
gem.add_dependency 'faraday-http-cache', '~> 2.2.0'
|
22
22
|
gem.add_dependency 'faraday-encoding', '~> 0.0.5'
|
23
23
|
gem.add_dependency 'addressable', '~> 2.7.0'
|
24
24
|
gem.add_dependency 'fastimage', '~> 2.1.7'
|
25
25
|
gem.add_dependency 'nesty', '~> 1.0.2'
|
26
26
|
|
27
27
|
gem.add_development_dependency 'rspec', '~> 3.9.0'
|
28
|
-
gem.add_development_dependency 'webmock', '~> 3.
|
28
|
+
gem.add_development_dependency 'webmock', '~> 3.8.3'
|
29
29
|
gem.add_development_dependency 'awesome_print', '~> 1.8.0'
|
30
|
-
gem.add_development_dependency 'rake', '~> 13.0.
|
31
|
-
gem.add_development_dependency 'pry', '~> 0.
|
32
|
-
gem.add_development_dependency 'rubocop', '~> 0.
|
30
|
+
gem.add_development_dependency 'rake', '~> 13.0.1'
|
31
|
+
gem.add_development_dependency 'pry', '~> 0.13.1'
|
32
|
+
gem.add_development_dependency 'rubocop', '~> 0.82.0'
|
33
33
|
end
|
data/spec/document_spec.rb
CHANGED
@@ -44,6 +44,7 @@ describe MetaInspector::Document do
|
|
44
44
|
"images" => ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"],
|
45
45
|
"charset" => "utf-8",
|
46
46
|
"feed" => "http://feeds.feedburner.com/PageRankAlert",
|
47
|
+
"feeds" => [{href: "http://feeds.feedburner.com/PageRankAlert", title: "PageRankAlert.com blog", type: "application/rss+xml"}],
|
47
48
|
"h1" => [],
|
48
49
|
"h2" => ["Track your PageRank changes"],
|
49
50
|
"h3" => ["WHAT'S YOUR PAGERANK?"],
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200
|
2
|
+
date: Wed, 08 Jan 2020 23:21:58 GMT
|
3
|
+
content-type: text/html; charset=UTF-8
|
4
|
+
server: nginx/0.7.67
|
5
|
+
|
6
|
+
<!DOCTYPE html>
|
7
|
+
<html>
|
8
|
+
<head>
|
9
|
+
<title>a page with feeds</title>
|
10
|
+
<link rel="alternate" title="Articles - JSON Feed" type="application/json" href="https://example.org/feed.json" />
|
11
|
+
<link rel="alternate" title="Comments - JSON Feed" type="application/json" href="https://example.org/feed/comments.json" />
|
12
|
+
<link rel="alternate" title="Articles - RSS Feed" type="application/rss+xml" href="https://example.org/feed.rss" />
|
13
|
+
<link rel="alternate" title="Comments - RSS Feed" type="application/rss+xml" href="https://example.org/feed/comments.rss" />
|
14
|
+
<link rel="alternate" title="Articles - Atom Feed" type="application/atom+xml" href="https://example.org/feed.xml" />
|
15
|
+
<link rel="alternate" title="Comments - Atom Feed" type="application/atom+xml" href="https://example.org/feed/comments.xml" />
|
16
|
+
|
17
|
+
<link rel="alternate" title="Invalid Feed" />
|
18
|
+
<link rel="alternate" title="Feed with empty href" type="application/atom+xml" href="" />
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/1.0.5
|
3
|
+
Date: Thu, 29 Dec 2011 23:10:13 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Content-Length: 15013
|
6
|
+
Last-Modified: Fri, 02 Dec 2011 21:00:49 GMT
|
7
|
+
Connection: keep-alive
|
8
|
+
Accept-Ranges: bytes
|
9
|
+
|
10
|
+
<!DOCTYPE html>
|
11
|
+
<html>
|
12
|
+
<head>
|
13
|
+
<base href=""/>
|
14
|
+
<meta charset="utf-8" />
|
15
|
+
<title>Relative links</title>
|
16
|
+
</head>
|
17
|
+
<body>
|
18
|
+
<p>Relative links</p>
|
19
|
+
<a href="about">About</a>
|
20
|
+
<a href="../sitemap">Sitemap</a>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -39,7 +39,10 @@ describe MetaInspector do
|
|
39
39
|
context "on page with some broken feed links" do
|
40
40
|
let(:page){ MetaInspector.new('http://example.com/broken_head_links') }
|
41
41
|
it "tries to find correct one" do
|
42
|
-
|
42
|
+
expected = [
|
43
|
+
{ title: "TechCrunch RSS feed", href: "http://www.guardian.co.uk/media/techcrunch/rss", type: "application/rss+xml" }
|
44
|
+
]
|
45
|
+
expect(page.feeds).to eq(expected)
|
43
46
|
end
|
44
47
|
end
|
45
48
|
end
|
@@ -145,6 +145,13 @@ describe MetaInspector do
|
|
145
145
|
end
|
146
146
|
end
|
147
147
|
|
148
|
+
describe 'Relative links with empty or blank base' do
|
149
|
+
it 'should get the relative links from a document' do
|
150
|
+
m = MetaInspector.new('http://relativewithemptybase.com/company')
|
151
|
+
expect(m.links.internal).to eq(['http://relativewithemptybase.com/about', 'http://relativewithemptybase.com/sitemap'])
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
148
155
|
describe 'Relative links with base' do
|
149
156
|
it 'should get the relative links from a document' do
|
150
157
|
m = MetaInspector.new('http://relativewithbase.com/company/page2')
|
@@ -190,20 +197,37 @@ describe MetaInspector do
|
|
190
197
|
end
|
191
198
|
end
|
192
199
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
200
|
+
context "Feeds" do
|
201
|
+
let(:meta) { MetaInspector.new('http://feeds.example.com') }
|
202
|
+
|
203
|
+
describe "#feeds" do
|
204
|
+
it "should return all the document's feeds" do
|
205
|
+
expected = [
|
206
|
+
{ title: "Articles - JSON Feed", href: "https://example.org/feed.json", type: "application/json" },
|
207
|
+
{ title: "Comments - JSON Feed", href: "https://example.org/feed/comments.json", type: "application/json" },
|
208
|
+
{ title: "Articles - RSS Feed", href: "https://example.org/feed.rss", type: "application/rss+xml" },
|
209
|
+
{ title: "Comments - RSS Feed", href: "https://example.org/feed/comments.rss", type: "application/rss+xml" },
|
210
|
+
{ title: "Articles - Atom Feed", href: "https://example.org/feed.xml", type: "application/atom+xml" },
|
211
|
+
{ title: "Comments - Atom Feed", href: "https://example.org/feed/comments.xml", type: "application/atom+xml" }
|
212
|
+
]
|
213
|
+
expect(meta.feeds).to eq(expected)
|
214
|
+
end
|
198
215
|
|
199
|
-
|
200
|
-
|
201
|
-
|
216
|
+
it "should return nothing if no feeds found" do
|
217
|
+
@m = MetaInspector.new('http://www.alazan.com')
|
218
|
+
expect(@m.feeds).to eq([])
|
219
|
+
end
|
202
220
|
end
|
203
221
|
|
204
|
-
|
205
|
-
|
206
|
-
|
222
|
+
describe "#feed" do
|
223
|
+
it "should return the first feed's href" do
|
224
|
+
expect(meta.feed).to eq("https://example.org/feed.rss")
|
225
|
+
end
|
226
|
+
|
227
|
+
it "should give a deprecation warning" do
|
228
|
+
warning = "DEPRECATION: Use MetaInspector#feeds instead of #feed. The former gives you all feeds and their metadata, the latter will be removed.\n"
|
229
|
+
expect { meta.feed }.to output(warning).to_stderr
|
230
|
+
end
|
207
231
|
end
|
208
232
|
end
|
209
233
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -65,12 +65,11 @@ RSpec.configure do |config|
|
|
65
65
|
stub_request(:get, "http://relativewithbase.com/").to_return(fixture_file("relative_links_with_base.response"))
|
66
66
|
stub_request(:get, "http://relativewithbase.com/company/page2").to_return(fixture_file("relative_links_with_base.response"))
|
67
67
|
stub_request(:get, "http://relativewithbase.com/company/page2/").to_return(fixture_file("relative_links_with_base.response"))
|
68
|
+
stub_request(:get, "http://relativewithemptybase.com/company").to_return(fixture_file("relative_links_with_empty_base.response"))
|
68
69
|
stub_request(:get, "http://theonion-no-description.com").to_return(fixture_file("theonion-no-description.com.response"))
|
69
70
|
stub_request(:get, "http://www.24-horas.mx/mexico-firma-acuerdo-bilateral-automotriz-con-argentina/").to_return(fixture_file("relative_og_image.response"))
|
70
71
|
stub_request(:get, "http://www.alazan.com").to_return(fixture_file("alazan.com.response"))
|
71
72
|
stub_request(:get, "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups").to_return(fixture_file("guardian.co.uk.response"))
|
72
|
-
stub_request(:get, "http://www.iteh.at").to_return(fixture_file("iteh.at.response"))
|
73
|
-
stub_request(:get, "http://www.tea-tron.com/jbravo/blog/").to_return(fixture_file("tea-tron.com.response"))
|
74
73
|
stub_request(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/").to_return(fixture_file("theonion.com.response"))
|
75
74
|
stub_request(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc").to_return(fixture_file("youtube.response"))
|
76
75
|
stub_request(:get, "http://www.youtube.com/watch?v=short_title").to_return(fixture_file("youtube_short_title.response"))
|
@@ -79,5 +78,6 @@ RSpec.configure do |config|
|
|
79
78
|
stub_request(:get, "https://twitter.com/markupvalidator").to_return(fixture_file("twitter_markupvalidator.response"))
|
80
79
|
stub_request(:get, "https://www.facebook.com/").to_return(fixture_file("https.facebook.com.response"))
|
81
80
|
stub_request(:get, "http://example.com/meta_tags_empty").to_return(fixture_file("meta_tags_empty.response"))
|
81
|
+
stub_request(:get, "http://feeds.example.com").to_return(fixture_file("feeds.response"))
|
82
82
|
end
|
83
83
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,70 +16,70 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
19
|
+
version: 1.11.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
26
|
+
version: 1.11.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: faraday
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 1.1.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 1.1.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: faraday_middleware
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 1.0.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 1.0.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: faraday-cookie_jar
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.0.
|
61
|
+
version: 0.0.7
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.0.
|
68
|
+
version: 0.0.7
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: faraday-http-cache
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 2.
|
75
|
+
version: 2.2.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 2.
|
82
|
+
version: 2.2.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: faraday-encoding
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,14 +156,14 @@ dependencies:
|
|
156
156
|
requirements:
|
157
157
|
- - "~>"
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 3.
|
159
|
+
version: 3.8.3
|
160
160
|
type: :development
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 3.
|
166
|
+
version: 3.8.3
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: awesome_print
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -184,46 +184,45 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 13.0.
|
187
|
+
version: 13.0.1
|
188
188
|
type: :development
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 13.0.
|
194
|
+
version: 13.0.1
|
195
195
|
- !ruby/object:Gem::Dependency
|
196
196
|
name: pry
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 0.
|
201
|
+
version: 0.13.1
|
202
202
|
type: :development
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version: 0.
|
208
|
+
version: 0.13.1
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
210
|
name: rubocop
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
212
212
|
requirements:
|
213
213
|
- - "~>"
|
214
214
|
- !ruby/object:Gem::Version
|
215
|
-
version: 0.
|
215
|
+
version: 0.82.0
|
216
216
|
type: :development
|
217
217
|
prerelease: false
|
218
218
|
version_requirements: !ruby/object:Gem::Requirement
|
219
219
|
requirements:
|
220
220
|
- - "~>"
|
221
221
|
- !ruby/object:Gem::Version
|
222
|
-
version: 0.
|
222
|
+
version: 0.82.0
|
223
223
|
description: MetaInspector lets you scrape a web page and get its links, images, texts,
|
224
224
|
meta tags...
|
225
|
-
email:
|
226
|
-
- jaimeiniesta@gmail.com
|
225
|
+
email: jaimeiniesta@gmail.com
|
227
226
|
executables: []
|
228
227
|
extensions: []
|
229
228
|
extra_rdoc_files: []
|
@@ -278,6 +277,7 @@ files:
|
|
278
277
|
- spec/fixtures/encoding.response
|
279
278
|
- spec/fixtures/example.response
|
280
279
|
- spec/fixtures/facebook.com.response
|
280
|
+
- spec/fixtures/feeds.response
|
281
281
|
- spec/fixtures/guardian.co.uk.response
|
282
282
|
- spec/fixtures/head_links.response
|
283
283
|
- spec/fixtures/headings.response
|
@@ -286,7 +286,6 @@ files:
|
|
286
286
|
- spec/fixtures/invalid_byte_seq.response
|
287
287
|
- spec/fixtures/invalid_href.response
|
288
288
|
- spec/fixtures/invalid_utf8_byte_seq.response
|
289
|
-
- spec/fixtures/iteh.at.response
|
290
289
|
- spec/fixtures/largest_image_in_html.response
|
291
290
|
- spec/fixtures/largest_image_using_image_size.response
|
292
291
|
- spec/fixtures/malformed_href.response
|
@@ -303,8 +302,8 @@ files:
|
|
303
302
|
- spec/fixtures/protocol_relative.response
|
304
303
|
- spec/fixtures/relative_links.response
|
305
304
|
- spec/fixtures/relative_links_with_base.response
|
305
|
+
- spec/fixtures/relative_links_with_empty_base.response
|
306
306
|
- spec/fixtures/relative_og_image.response
|
307
|
-
- spec/fixtures/tea-tron.com.response
|
308
307
|
- spec/fixtures/theonion-no-description.com.response
|
309
308
|
- spec/fixtures/theonion.com.response
|
310
309
|
- spec/fixtures/title_best_choice.response
|
@@ -330,11 +329,11 @@ files:
|
|
330
329
|
- spec/request_spec.rb
|
331
330
|
- spec/spec_helper.rb
|
332
331
|
- spec/url_spec.rb
|
333
|
-
homepage: https://github.com/
|
332
|
+
homepage: https://github.com/metainspector/metainspector
|
334
333
|
licenses:
|
335
334
|
- MIT
|
336
335
|
metadata: {}
|
337
|
-
post_install_message:
|
336
|
+
post_install_message:
|
338
337
|
rdoc_options: []
|
339
338
|
require_paths:
|
340
339
|
- lib
|
@@ -349,8 +348,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
349
348
|
- !ruby/object:Gem::Version
|
350
349
|
version: '0'
|
351
350
|
requirements: []
|
352
|
-
rubygems_version: 3.
|
353
|
-
signing_key:
|
351
|
+
rubygems_version: 3.1.2
|
352
|
+
signing_key:
|
354
353
|
specification_version: 4
|
355
354
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns metadata
|
356
355
|
from a given URL
|