metainspector 5.1.1 → 5.1.2
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 217018e977493f4b34675983860e75428f8f69e0
|
4
|
+
data.tar.gz: 6a4dd2cd43014ebbd07efa0883adcb0d3aa2df92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00e976815e2076b14c8ca8d551bf1f13709b3db84ca670977eb10077fe5e1e17c4bb09ae67e1d388f434d6c39e371eeb514b8f3fb1a4c07c7f5d36e9c75d546b
|
7
|
+
data.tar.gz: 0b6dddcb366166ffee6b056f5ad343b2b0cc2ca30077fa861fb652c8e0a39f006a00149f44465eac66952768bf8b9b319c2d8768885018f3b39a2d776d0ff4ce
|
@@ -32,8 +32,8 @@ module MetaInspector
|
|
32
32
|
private
|
33
33
|
|
34
34
|
def parsed_feed(format)
|
35
|
-
feed = parsed.search("//link[@type='application/#{format}+xml']").
|
36
|
-
feed ? URL.absolutify(feed
|
35
|
+
feed = parsed.search("//link[@type='application/#{format}+xml']").find{|link| link.attributes["href"] }
|
36
|
+
feed ? URL.absolutify(feed['href'], base_url) : nil
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Content-Type: text/html
|
5
|
+
Connection: keep-alive
|
6
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
7
|
+
Content-Length: 4987
|
8
|
+
X-Varnish: 2000423390
|
9
|
+
Age: 0
|
10
|
+
Via: 1.1 varnish
|
11
|
+
|
12
|
+
<html>
|
13
|
+
<head>
|
14
|
+
<title>An example page</title>
|
15
|
+
<link rel="alternate" type="application/rss+xml" title="Media RSS feed" />
|
16
|
+
<link href="http://www.guardian.co.uk/media/techcrunch/rss" rel="alternate" type="application/rss+xml" title="TechCrunch RSS feed" />
|
17
|
+
<link
|
18
|
+
rel="canonical"
|
19
|
+
href="http://example.com/canonical-from-head"
|
20
|
+
/>
|
21
|
+
<link rel="stylesheet" href="/stylesheets/screen.css">
|
22
|
+
<link rel="stylesheet" href="//example2.com/stylesheets/screen.css">
|
23
|
+
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
|
24
|
+
<link rel="shorturl" href="http://gu.com/p/32v5a" />
|
25
|
+
<link
|
26
|
+
rel="stylesheet"
|
27
|
+
type="text/css"
|
28
|
+
href="http://foo/print.css"
|
29
|
+
media="print"
|
30
|
+
class="contrast"
|
31
|
+
/>
|
32
|
+
</head>
|
33
|
+
<body>
|
34
|
+
<h1>Hello World</h1>
|
35
|
+
</body>
|
36
|
+
</html>
|
@@ -37,6 +37,12 @@ describe MetaInspector do
|
|
37
37
|
])
|
38
38
|
end
|
39
39
|
|
40
|
+
context "on page with some broken feed links" do
|
41
|
+
let(:page){ MetaInspector.new('http://example.com/broken_head_links') }
|
42
|
+
it "tries to find correct one" do
|
43
|
+
expect(page.feed).to eq("http://www.guardian.co.uk/media/techcrunch/rss")
|
44
|
+
end
|
45
|
+
end
|
40
46
|
end
|
41
47
|
|
42
48
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -44,6 +44,7 @@ FakeWeb.register_uri(:get, "http://www.24-horas.mx/mexico-firma-acuerdo-bilatera
|
|
44
44
|
#Used to test canonical URLs in head
|
45
45
|
FakeWeb.register_uri(:get, "http://example.com/head_links", :response => fixture_file("head_links.response"))
|
46
46
|
FakeWeb.register_uri(:get, "https://example.com/head_links", :response => fixture_file("head_links.response"))
|
47
|
+
FakeWeb.register_uri(:get, "http://example.com/broken_head_links", :response => fixture_file("broken_head_links.response"))
|
47
48
|
|
48
49
|
# Used to test best_title logic
|
49
50
|
FakeWeb.register_uri(:get, "http://example.com/title_in_head", :response => fixture_file("title_in_head.response"))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.1.
|
4
|
+
version: 5.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -305,6 +305,7 @@ files:
|
|
305
305
|
- spec/fixtures/404.response
|
306
306
|
- spec/fixtures/alazan.com.response
|
307
307
|
- spec/fixtures/alazan_websolution.response
|
308
|
+
- spec/fixtures/broken_head_links.response
|
308
309
|
- spec/fixtures/charset_000.response
|
309
310
|
- spec/fixtures/charset_001.response
|
310
311
|
- spec/fixtures/charset_002.response
|
@@ -380,7 +381,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
380
381
|
version: '0'
|
381
382
|
requirements: []
|
382
383
|
rubyforge_project:
|
383
|
-
rubygems_version: 2.
|
384
|
+
rubygems_version: 2.4.8
|
384
385
|
signing_key:
|
385
386
|
specification_version: 4
|
386
387
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns metadata
|