metainspector 2.3.0 → 2.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/meta_inspector/document.rb +2 -2
- data/lib/meta_inspector/parser.rb +7 -4
- data/lib/meta_inspector/request.rb +3 -4
- data/lib/meta_inspector/url.rb +3 -4
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -0
- data/spec/fixtures/pagerankalert-shortcut-and-icon.com.response +187 -0
- data/spec/fixtures/pagerankalert-shortcut.com.response +187 -0
- data/spec/fixtures/pagerankalert-touch-icon.com.response +188 -0
- data/spec/parser_spec.rb +16 -1
- data/spec/request_spec.rb +1 -0
- data/spec/spec_helper.rb +10 -0
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f703091a8a43619fd8f6ecfdb60de18a6325a41c
|
4
|
+
data.tar.gz: 629dff9035a4c8c5c4aa6b27d73a3e689a0873a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 361be3755dbaab2ac880d8f5414c613ae1d3fde35899c6256710c8e94733105da4c13a007cfe002760775ef9b39961dc571c37470ecdba0ade89690aeac33de3
|
7
|
+
data.tar.gz: 79eb048b137b55088b2e9ac587dfcd31300fa121073f79fcd8a5598e1384670f54fcc84cf4951f9d111f7e468219ec6fa065917146ebe622f42826ffadd7562f
|
@@ -37,9 +37,8 @@ module MetaInspector
|
|
37
37
|
# Returns the whole parsed document
|
38
38
|
def parsed
|
39
39
|
@parsed ||= Nokogiri::HTML(@document.to_s)
|
40
|
-
|
41
|
-
|
42
|
-
@exception_log << e
|
40
|
+
rescue Exception => e
|
41
|
+
@exception_log << e
|
43
42
|
end
|
44
43
|
|
45
44
|
# Returns the parsed document title, from the content of the <title> tag.
|
@@ -50,7 +49,11 @@ module MetaInspector
|
|
50
49
|
|
51
50
|
# Return favicon url if exist
|
52
51
|
def favicon
|
53
|
-
|
52
|
+
query = '//link[@rel="icon" or contains(@rel, "shortcut")]'
|
53
|
+
value = parsed.xpath(query)[0].attributes['href'].value
|
54
|
+
@favicon ||= URL.absolutify(value, base_url)
|
55
|
+
rescue
|
56
|
+
nil
|
54
57
|
end
|
55
58
|
|
56
59
|
# A description getter that first checks for a meta description and if not present will
|
@@ -37,10 +37,9 @@ module MetaInspector
|
|
37
37
|
|
38
38
|
def response
|
39
39
|
Timeout::timeout(@timeout) { @response ||= fetch }
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
nil
|
40
|
+
rescue TimeoutError, SocketError, RuntimeError => e
|
41
|
+
@exception_log << e
|
42
|
+
nil
|
44
43
|
end
|
45
44
|
|
46
45
|
def fetch
|
data/lib/meta_inspector/url.rb
CHANGED
@@ -66,10 +66,9 @@ module MetaInspector
|
|
66
66
|
|
67
67
|
def parsed(url)
|
68
68
|
Addressable::URI.parse(url)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
nil
|
69
|
+
rescue Addressable::URI::InvalidURIError => e
|
70
|
+
@exception_log << e
|
71
|
+
nil
|
73
72
|
end
|
74
73
|
end
|
75
74
|
end
|
data/meta_inspector.gemspec
CHANGED
@@ -0,0 +1,187 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Mon, 30 May 2011 09:45:42 GMT
|
4
|
+
Content-Type: text/html; charset=utf-8
|
5
|
+
Connection: keep-alive
|
6
|
+
ETag: "d0534cf7ad7d7a7fb737fe4ad99b0fd1"
|
7
|
+
X-UA-Compatible: IE=Edge,chrome=1
|
8
|
+
X-Runtime: 0.031274
|
9
|
+
Set-Cookie: _session_id=33575f7694b4492af4c4e282d62a7127; path=/; HttpOnly
|
10
|
+
Cache-Control: max-age=0, private, must-revalidate
|
11
|
+
Content-Length: 6690
|
12
|
+
X-Varnish: 2167295052
|
13
|
+
Age: 0
|
14
|
+
Via: 1.1 varnish
|
15
|
+
|
16
|
+
<!DOCTYPE html>
|
17
|
+
<html lang="en">
|
18
|
+
<head>
|
19
|
+
<meta charset=utf-8>
|
20
|
+
<link rel="alternate" type="application/rss+xml" title="PageRankAlert.com blog" href="http://feeds.feedburner.com/PageRankAlert" />
|
21
|
+
<title>PageRankAlert.com :: Track your PageRank changes & receive alerts</title>
|
22
|
+
<link rel="shortcut icon" href="/src/favicon.ico">
|
23
|
+
<meta name="description" content="Track your PageRank(TM) changes and receive alerts by email" />
|
24
|
+
<meta name="keywords" content="pagerank, seo, optimization, google" />
|
25
|
+
<meta name="robots" content="all,follow" />
|
26
|
+
<link rel="stylesheet" href="/stylesheets/screen.css">
|
27
|
+
<link href='http://fonts.googleapis.com/css?family=Yanone+Kaffeesatz&subset=latin' rel='stylesheet' type='text/css'>
|
28
|
+
|
29
|
+
<script src="/javascripts/jquery.min.js?1305794559" type="text/javascript"></script>
|
30
|
+
<script src="/javascripts/rails.js?1305794559" type="text/javascript"></script>
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
<meta name="csrf-param" content="authenticity_token"/>
|
35
|
+
<meta name="csrf-token" content="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="/>
|
36
|
+
</head>
|
37
|
+
|
38
|
+
<body>
|
39
|
+
<script type="text/javascript">
|
40
|
+
(function(){
|
41
|
+
var bsa = document.createElement('script');
|
42
|
+
bsa.type = 'text/javascript';
|
43
|
+
bsa.async = true;
|
44
|
+
bsa.src = '//s3.buysellads.com/ac/bsa.js';
|
45
|
+
(document.getElementsByTagName('head')[0]||document.getElementsByTagName('body')[0]).appendChild(bsa);
|
46
|
+
})();
|
47
|
+
</script> <div id="flash_notice" class="flashmessage" style='display:none;'></div>
|
48
|
+
|
49
|
+
<div id="flash_error" class="flashmessage" style='display:none;'></div>
|
50
|
+
|
51
|
+
<div id="flash_alert" class="flashmessage" style='display:none;'></div>
|
52
|
+
|
53
|
+
<div id="banner_top" style="background-color:#111; color:#fff;text-align:center;font-size:1.4em;font-weight:bold;padding:0.6em;">
|
54
|
+
<div style="width:980px; margin:0 auto;">
|
55
|
+
<div id="bsap_1260794" style="margin-left: 125px;" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
56
|
+
</div>
|
57
|
+
</div>
|
58
|
+
|
59
|
+
<div id="page">
|
60
|
+
<div id="header">
|
61
|
+
<div class="section">
|
62
|
+
<h1>
|
63
|
+
<a href="/" id="logo" tabindex="1"><img alt="PageRankAlert" src="/images/pagerank_alert.png?1305794559" /></a>
|
64
|
+
</h1>
|
65
|
+
</div>
|
66
|
+
|
67
|
+
<div class="section language">
|
68
|
+
<span id='eng'>eng</span> |
|
69
|
+
<a href="/es?language=es"><span id='esp'>esp</span></a>
|
70
|
+
</div>
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
<div class="section login">
|
77
|
+
<a href="/users/sign_up">Register</a> or <a href="/users/sign_in">Sign in</a>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
<div class="aside">
|
81
|
+
<ul class="nav">
|
82
|
+
<li><a href="/" class="selectedss">home</a></li>
|
83
|
+
<li><a href="mailto:pagerankalert@gmail.com">contact</a></li>
|
84
|
+
<li><a href="http://pagerankalert.posterous.com" target="_blank">blog</a></li>
|
85
|
+
<li><a href="http://twitter.com/pagerankalert" target="_blank">twitter</a></li>
|
86
|
+
<li style="margin-top:10px; margin-left:40px;"><a href="http://twitter.com/share" style="display:block;" class="twitter-share-button" data-count="horizontal" data-via="pagerankalert" data-lang="en">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script></li>
|
87
|
+
</ul>
|
88
|
+
</div>
|
89
|
+
</div>
|
90
|
+
|
91
|
+
<div id="main">
|
92
|
+
<div id="content1">
|
93
|
+
<h2 class="begin">
|
94
|
+
Track your PageRank changes
|
95
|
+
</h2>
|
96
|
+
<h3 class="begin">WHAT'S YOUR PAGERANK?</h3>
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
<div id="banner_up" class="tac" style="margin-top:2em; margin-bottom:-2em;">
|
101
|
+
<div id="bsap_1258008" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1" style="width: 728px; padding-left:115px;"></div>
|
102
|
+
</div>
|
103
|
+
|
104
|
+
<form accept-charset="UTF-8" action="/pages" class="search_form general_form" id="pagerank" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="✓" /><input name="authenticity_token" type="hidden" value="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE=" /></div>
|
105
|
+
<label for="myurl">Search your page now:</label>
|
106
|
+
<input class="text" id="page_url" maxlength="250" name="page[url]" size="60" type="text" value="http://" />
|
107
|
+
|
108
|
+
<p class="explanation">
|
109
|
+
PageRank is different for the same URL with or without <em>www</em>
|
110
|
+
</p>
|
111
|
+
<p class="tac">
|
112
|
+
<button type="submit">Check PageRank »</button>
|
113
|
+
</p>
|
114
|
+
</form>
|
115
|
+
<div id="features">
|
116
|
+
<div id="a" class="section">
|
117
|
+
<h4><strong>Build your own lists</strong></h4>
|
118
|
+
<p>Have you got a lot of pages that you'd like to track easily from one single place?<br /><br />You can build your own <strong>PageRank watchlist</strong> with an unlimited number of URLs.</p>
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div id="b" class="section">
|
122
|
+
<h4><strong>Get e-mail alerts</strong></h4>
|
123
|
+
<p>Do you want to be notified by email when your <strong>PageRank</strong> changes?<br /><br />You'll get an <strong>email</strong> when we detect a change in any of your tracked sites.</p>
|
124
|
+
</div>
|
125
|
+
|
126
|
+
<div id="c" class="section last">
|
127
|
+
<h4><strong>Track your history</strong></h4>
|
128
|
+
<p>Do you want to know the details of your <strong>PageRank evolution</strong>?<br /><br />We will show you a graphic with all the registered PageRank changes for each page.</p>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
|
132
|
+
<div id="banner_down" class="tac">
|
133
|
+
<div id="bsap_1256852" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
134
|
+
</div>
|
135
|
+
|
136
|
+
</div>
|
137
|
+
</div>
|
138
|
+
|
139
|
+
<div id="footer">
|
140
|
+
<div id="credits" class="section">
|
141
|
+
<small>
|
142
|
+
<p>
|
143
|
+
© 2006-2011 PageRankAlert.com. This site is not associated to Google. PageRank is a registered trademark of Google Inc.
|
144
|
+
</p>
|
145
|
+
</small>
|
146
|
+
</div>
|
147
|
+
</div>
|
148
|
+
</div>
|
149
|
+
|
150
|
+
<script type="text/javascript">
|
151
|
+
var uservoiceOptions = {
|
152
|
+
key: 'pagerankalert',
|
153
|
+
host: 'pagerankalert.uservoice.com',
|
154
|
+
forum: '6999',
|
155
|
+
showTab: true,
|
156
|
+
|
157
|
+
alignment: 'left',
|
158
|
+
background_color:'#f00',
|
159
|
+
text_color: 'white',
|
160
|
+
hover_color: '#06C',
|
161
|
+
lang: 'en'
|
162
|
+
};
|
163
|
+
|
164
|
+
function _loadUserVoice() {
|
165
|
+
var s = document.createElement('script');
|
166
|
+
s.setAttribute('type', 'text/javascript');
|
167
|
+
s.setAttribute('src', ("https:" == document.location.protocol ? "https://" : "http://") + "cdn.uservoice.com/javascripts/widgets/tab.js");
|
168
|
+
document.getElementsByTagName('head')[0].appendChild(s);
|
169
|
+
}
|
170
|
+
_loadSuper = window.onload;
|
171
|
+
window.onload = (typeof window.onload != 'function') ? _loadUserVoice : function() { _loadSuper(); _loadUserVoice(); };
|
172
|
+
</script>
|
173
|
+
<script type="text/javascript">
|
174
|
+
|
175
|
+
var _gaq = _gaq || [];
|
176
|
+
_gaq.push(['_setAccount', 'UA-122379-8']);
|
177
|
+
_gaq.push(['_trackPageview']);
|
178
|
+
|
179
|
+
(function() {
|
180
|
+
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
181
|
+
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
182
|
+
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
183
|
+
})();
|
184
|
+
|
185
|
+
</script>
|
186
|
+
</body>
|
187
|
+
</html>
|
@@ -0,0 +1,187 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Mon, 30 May 2011 09:45:42 GMT
|
4
|
+
Content-Type: text/html; charset=utf-8
|
5
|
+
Connection: keep-alive
|
6
|
+
ETag: "d0534cf7ad7d7a7fb737fe4ad99b0fd1"
|
7
|
+
X-UA-Compatible: IE=Edge,chrome=1
|
8
|
+
X-Runtime: 0.031274
|
9
|
+
Set-Cookie: _session_id=33575f7694b4492af4c4e282d62a7127; path=/; HttpOnly
|
10
|
+
Cache-Control: max-age=0, private, must-revalidate
|
11
|
+
Content-Length: 6690
|
12
|
+
X-Varnish: 2167295052
|
13
|
+
Age: 0
|
14
|
+
Via: 1.1 varnish
|
15
|
+
|
16
|
+
<!DOCTYPE html>
|
17
|
+
<html lang="en">
|
18
|
+
<head>
|
19
|
+
<meta charset=utf-8>
|
20
|
+
<link rel="alternate" type="application/rss+xml" title="PageRankAlert.com blog" href="http://feeds.feedburner.com/PageRankAlert" />
|
21
|
+
<title>PageRankAlert.com :: Track your PageRank changes & receive alerts</title>
|
22
|
+
<link rel="shortcut" href="/src/favicon.ico">
|
23
|
+
<meta name="description" content="Track your PageRank(TM) changes and receive alerts by email" />
|
24
|
+
<meta name="keywords" content="pagerank, seo, optimization, google" />
|
25
|
+
<meta name="robots" content="all,follow" />
|
26
|
+
<link rel="stylesheet" href="/stylesheets/screen.css">
|
27
|
+
<link href='http://fonts.googleapis.com/css?family=Yanone+Kaffeesatz&subset=latin' rel='stylesheet' type='text/css'>
|
28
|
+
|
29
|
+
<script src="/javascripts/jquery.min.js?1305794559" type="text/javascript"></script>
|
30
|
+
<script src="/javascripts/rails.js?1305794559" type="text/javascript"></script>
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
<meta name="csrf-param" content="authenticity_token"/>
|
35
|
+
<meta name="csrf-token" content="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="/>
|
36
|
+
</head>
|
37
|
+
|
38
|
+
<body>
|
39
|
+
<script type="text/javascript">
|
40
|
+
(function(){
|
41
|
+
var bsa = document.createElement('script');
|
42
|
+
bsa.type = 'text/javascript';
|
43
|
+
bsa.async = true;
|
44
|
+
bsa.src = '//s3.buysellads.com/ac/bsa.js';
|
45
|
+
(document.getElementsByTagName('head')[0]||document.getElementsByTagName('body')[0]).appendChild(bsa);
|
46
|
+
})();
|
47
|
+
</script> <div id="flash_notice" class="flashmessage" style='display:none;'></div>
|
48
|
+
|
49
|
+
<div id="flash_error" class="flashmessage" style='display:none;'></div>
|
50
|
+
|
51
|
+
<div id="flash_alert" class="flashmessage" style='display:none;'></div>
|
52
|
+
|
53
|
+
<div id="banner_top" style="background-color:#111; color:#fff;text-align:center;font-size:1.4em;font-weight:bold;padding:0.6em;">
|
54
|
+
<div style="width:980px; margin:0 auto;">
|
55
|
+
<div id="bsap_1260794" style="margin-left: 125px;" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
56
|
+
</div>
|
57
|
+
</div>
|
58
|
+
|
59
|
+
<div id="page">
|
60
|
+
<div id="header">
|
61
|
+
<div class="section">
|
62
|
+
<h1>
|
63
|
+
<a href="/" id="logo" tabindex="1"><img alt="PageRankAlert" src="/images/pagerank_alert.png?1305794559" /></a>
|
64
|
+
</h1>
|
65
|
+
</div>
|
66
|
+
|
67
|
+
<div class="section language">
|
68
|
+
<span id='eng'>eng</span> |
|
69
|
+
<a href="/es?language=es"><span id='esp'>esp</span></a>
|
70
|
+
</div>
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
<div class="section login">
|
77
|
+
<a href="/users/sign_up">Register</a> or <a href="/users/sign_in">Sign in</a>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
<div class="aside">
|
81
|
+
<ul class="nav">
|
82
|
+
<li><a href="/" class="selectedss">home</a></li>
|
83
|
+
<li><a href="mailto:pagerankalert@gmail.com">contact</a></li>
|
84
|
+
<li><a href="http://pagerankalert.posterous.com" target="_blank">blog</a></li>
|
85
|
+
<li><a href="http://twitter.com/pagerankalert" target="_blank">twitter</a></li>
|
86
|
+
<li style="margin-top:10px; margin-left:40px;"><a href="http://twitter.com/share" style="display:block;" class="twitter-share-button" data-count="horizontal" data-via="pagerankalert" data-lang="en">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script></li>
|
87
|
+
</ul>
|
88
|
+
</div>
|
89
|
+
</div>
|
90
|
+
|
91
|
+
<div id="main">
|
92
|
+
<div id="content1">
|
93
|
+
<h2 class="begin">
|
94
|
+
Track your PageRank changes
|
95
|
+
</h2>
|
96
|
+
<h3 class="begin">WHAT'S YOUR PAGERANK?</h3>
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
<div id="banner_up" class="tac" style="margin-top:2em; margin-bottom:-2em;">
|
101
|
+
<div id="bsap_1258008" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1" style="width: 728px; padding-left:115px;"></div>
|
102
|
+
</div>
|
103
|
+
|
104
|
+
<form accept-charset="UTF-8" action="/pages" class="search_form general_form" id="pagerank" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="✓" /><input name="authenticity_token" type="hidden" value="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE=" /></div>
|
105
|
+
<label for="myurl">Search your page now:</label>
|
106
|
+
<input class="text" id="page_url" maxlength="250" name="page[url]" size="60" type="text" value="http://" />
|
107
|
+
|
108
|
+
<p class="explanation">
|
109
|
+
PageRank is different for the same URL with or without <em>www</em>
|
110
|
+
</p>
|
111
|
+
<p class="tac">
|
112
|
+
<button type="submit">Check PageRank »</button>
|
113
|
+
</p>
|
114
|
+
</form>
|
115
|
+
<div id="features">
|
116
|
+
<div id="a" class="section">
|
117
|
+
<h4><strong>Build your own lists</strong></h4>
|
118
|
+
<p>Have you got a lot of pages that you'd like to track easily from one single place?<br /><br />You can build your own <strong>PageRank watchlist</strong> with an unlimited number of URLs.</p>
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div id="b" class="section">
|
122
|
+
<h4><strong>Get e-mail alerts</strong></h4>
|
123
|
+
<p>Do you want to be notified by email when your <strong>PageRank</strong> changes?<br /><br />You'll get an <strong>email</strong> when we detect a change in any of your tracked sites.</p>
|
124
|
+
</div>
|
125
|
+
|
126
|
+
<div id="c" class="section last">
|
127
|
+
<h4><strong>Track your history</strong></h4>
|
128
|
+
<p>Do you want to know the details of your <strong>PageRank evolution</strong>?<br /><br />We will show you a graphic with all the registered PageRank changes for each page.</p>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
|
132
|
+
<div id="banner_down" class="tac">
|
133
|
+
<div id="bsap_1256852" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
134
|
+
</div>
|
135
|
+
|
136
|
+
</div>
|
137
|
+
</div>
|
138
|
+
|
139
|
+
<div id="footer">
|
140
|
+
<div id="credits" class="section">
|
141
|
+
<small>
|
142
|
+
<p>
|
143
|
+
© 2006-2011 PageRankAlert.com. This site is not associated to Google. PageRank is a registered trademark of Google Inc.
|
144
|
+
</p>
|
145
|
+
</small>
|
146
|
+
</div>
|
147
|
+
</div>
|
148
|
+
</div>
|
149
|
+
|
150
|
+
<script type="text/javascript">
|
151
|
+
var uservoiceOptions = {
|
152
|
+
key: 'pagerankalert',
|
153
|
+
host: 'pagerankalert.uservoice.com',
|
154
|
+
forum: '6999',
|
155
|
+
showTab: true,
|
156
|
+
|
157
|
+
alignment: 'left',
|
158
|
+
background_color:'#f00',
|
159
|
+
text_color: 'white',
|
160
|
+
hover_color: '#06C',
|
161
|
+
lang: 'en'
|
162
|
+
};
|
163
|
+
|
164
|
+
function _loadUserVoice() {
|
165
|
+
var s = document.createElement('script');
|
166
|
+
s.setAttribute('type', 'text/javascript');
|
167
|
+
s.setAttribute('src', ("https:" == document.location.protocol ? "https://" : "http://") + "cdn.uservoice.com/javascripts/widgets/tab.js");
|
168
|
+
document.getElementsByTagName('head')[0].appendChild(s);
|
169
|
+
}
|
170
|
+
_loadSuper = window.onload;
|
171
|
+
window.onload = (typeof window.onload != 'function') ? _loadUserVoice : function() { _loadSuper(); _loadUserVoice(); };
|
172
|
+
</script>
|
173
|
+
<script type="text/javascript">
|
174
|
+
|
175
|
+
var _gaq = _gaq || [];
|
176
|
+
_gaq.push(['_setAccount', 'UA-122379-8']);
|
177
|
+
_gaq.push(['_trackPageview']);
|
178
|
+
|
179
|
+
(function() {
|
180
|
+
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
181
|
+
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
182
|
+
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
183
|
+
})();
|
184
|
+
|
185
|
+
</script>
|
186
|
+
</body>
|
187
|
+
</html>
|
@@ -0,0 +1,188 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Mon, 30 May 2011 09:45:42 GMT
|
4
|
+
Content-Type: text/html; charset=utf-8
|
5
|
+
Connection: keep-alive
|
6
|
+
ETag: "d0534cf7ad7d7a7fb737fe4ad99b0fd1"
|
7
|
+
X-UA-Compatible: IE=Edge,chrome=1
|
8
|
+
X-Runtime: 0.031274
|
9
|
+
Set-Cookie: _session_id=33575f7694b4492af4c4e282d62a7127; path=/; HttpOnly
|
10
|
+
Cache-Control: max-age=0, private, must-revalidate
|
11
|
+
Content-Length: 6690
|
12
|
+
X-Varnish: 2167295052
|
13
|
+
Age: 0
|
14
|
+
Via: 1.1 varnish
|
15
|
+
|
16
|
+
<!DOCTYPE html>
|
17
|
+
<html lang="en">
|
18
|
+
<head>
|
19
|
+
<meta charset=utf-8>
|
20
|
+
<link rel="alternate" type="application/rss+xml" title="PageRankAlert.com blog" href="http://feeds.feedburner.com/PageRankAlert" />
|
21
|
+
<title>PageRankAlert.com :: Track your PageRank changes & receive alerts</title>
|
22
|
+
<link href="img/apple-touch-icon-58x58.png" rel="apple-touch-icon" sizes="58x58" />
|
23
|
+
<link rel="icon" href="/src/favicon.ico">
|
24
|
+
<meta name="description" content="Track your PageRank(TM) changes and receive alerts by email" />
|
25
|
+
<meta name="keywords" content="pagerank, seo, optimization, google" />
|
26
|
+
<meta name="robots" content="all,follow" />
|
27
|
+
<link rel="stylesheet" href="/stylesheets/screen.css">
|
28
|
+
<link href='http://fonts.googleapis.com/css?family=Yanone+Kaffeesatz&subset=latin' rel='stylesheet' type='text/css'>
|
29
|
+
|
30
|
+
<script src="/javascripts/jquery.min.js?1305794559" type="text/javascript"></script>
|
31
|
+
<script src="/javascripts/rails.js?1305794559" type="text/javascript"></script>
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
<meta name="csrf-param" content="authenticity_token"/>
|
36
|
+
<meta name="csrf-token" content="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="/>
|
37
|
+
</head>
|
38
|
+
|
39
|
+
<body>
|
40
|
+
<script type="text/javascript">
|
41
|
+
(function(){
|
42
|
+
var bsa = document.createElement('script');
|
43
|
+
bsa.type = 'text/javascript';
|
44
|
+
bsa.async = true;
|
45
|
+
bsa.src = '//s3.buysellads.com/ac/bsa.js';
|
46
|
+
(document.getElementsByTagName('head')[0]||document.getElementsByTagName('body')[0]).appendChild(bsa);
|
47
|
+
})();
|
48
|
+
</script> <div id="flash_notice" class="flashmessage" style='display:none;'></div>
|
49
|
+
|
50
|
+
<div id="flash_error" class="flashmessage" style='display:none;'></div>
|
51
|
+
|
52
|
+
<div id="flash_alert" class="flashmessage" style='display:none;'></div>
|
53
|
+
|
54
|
+
<div id="banner_top" style="background-color:#111; color:#fff;text-align:center;font-size:1.4em;font-weight:bold;padding:0.6em;">
|
55
|
+
<div style="width:980px; margin:0 auto;">
|
56
|
+
<div id="bsap_1260794" style="margin-left: 125px;" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
57
|
+
</div>
|
58
|
+
</div>
|
59
|
+
|
60
|
+
<div id="page">
|
61
|
+
<div id="header">
|
62
|
+
<div class="section">
|
63
|
+
<h1>
|
64
|
+
<a href="/" id="logo" tabindex="1"><img alt="PageRankAlert" src="/images/pagerank_alert.png?1305794559" /></a>
|
65
|
+
</h1>
|
66
|
+
</div>
|
67
|
+
|
68
|
+
<div class="section language">
|
69
|
+
<span id='eng'>eng</span> |
|
70
|
+
<a href="/es?language=es"><span id='esp'>esp</span></a>
|
71
|
+
</div>
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
<div class="section login">
|
78
|
+
<a href="/users/sign_up">Register</a> or <a href="/users/sign_in">Sign in</a>
|
79
|
+
</div>
|
80
|
+
|
81
|
+
<div class="aside">
|
82
|
+
<ul class="nav">
|
83
|
+
<li><a href="/" class="selectedss">home</a></li>
|
84
|
+
<li><a href="mailto:pagerankalert@gmail.com">contact</a></li>
|
85
|
+
<li><a href="http://pagerankalert.posterous.com" target="_blank">blog</a></li>
|
86
|
+
<li><a href="http://twitter.com/pagerankalert" target="_blank">twitter</a></li>
|
87
|
+
<li style="margin-top:10px; margin-left:40px;"><a href="http://twitter.com/share" style="display:block;" class="twitter-share-button" data-count="horizontal" data-via="pagerankalert" data-lang="en">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script></li>
|
88
|
+
</ul>
|
89
|
+
</div>
|
90
|
+
</div>
|
91
|
+
|
92
|
+
<div id="main">
|
93
|
+
<div id="content1">
|
94
|
+
<h2 class="begin">
|
95
|
+
Track your PageRank changes
|
96
|
+
</h2>
|
97
|
+
<h3 class="begin">WHAT'S YOUR PAGERANK?</h3>
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
<div id="banner_up" class="tac" style="margin-top:2em; margin-bottom:-2em;">
|
102
|
+
<div id="bsap_1258008" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1" style="width: 728px; padding-left:115px;"></div>
|
103
|
+
</div>
|
104
|
+
|
105
|
+
<form accept-charset="UTF-8" action="/pages" class="search_form general_form" id="pagerank" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="✓" /><input name="authenticity_token" type="hidden" value="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE=" /></div>
|
106
|
+
<label for="myurl">Search your page now:</label>
|
107
|
+
<input class="text" id="page_url" maxlength="250" name="page[url]" size="60" type="text" value="http://" />
|
108
|
+
|
109
|
+
<p class="explanation">
|
110
|
+
PageRank is different for the same URL with or without <em>www</em>
|
111
|
+
</p>
|
112
|
+
<p class="tac">
|
113
|
+
<button type="submit">Check PageRank »</button>
|
114
|
+
</p>
|
115
|
+
</form>
|
116
|
+
<div id="features">
|
117
|
+
<div id="a" class="section">
|
118
|
+
<h4><strong>Build your own lists</strong></h4>
|
119
|
+
<p>Have you got a lot of pages that you'd like to track easily from one single place?<br /><br />You can build your own <strong>PageRank watchlist</strong> with an unlimited number of URLs.</p>
|
120
|
+
</div>
|
121
|
+
|
122
|
+
<div id="b" class="section">
|
123
|
+
<h4><strong>Get e-mail alerts</strong></h4>
|
124
|
+
<p>Do you want to be notified by email when your <strong>PageRank</strong> changes?<br /><br />You'll get an <strong>email</strong> when we detect a change in any of your tracked sites.</p>
|
125
|
+
</div>
|
126
|
+
|
127
|
+
<div id="c" class="section last">
|
128
|
+
<h4><strong>Track your history</strong></h4>
|
129
|
+
<p>Do you want to know the details of your <strong>PageRank evolution</strong>?<br /><br />We will show you a graphic with all the registered PageRank changes for each page.</p>
|
130
|
+
</div>
|
131
|
+
</div>
|
132
|
+
|
133
|
+
<div id="banner_down" class="tac">
|
134
|
+
<div id="bsap_1256852" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
135
|
+
</div>
|
136
|
+
|
137
|
+
</div>
|
138
|
+
</div>
|
139
|
+
|
140
|
+
<div id="footer">
|
141
|
+
<div id="credits" class="section">
|
142
|
+
<small>
|
143
|
+
<p>
|
144
|
+
© 2006-2011 PageRankAlert.com. This site is not associated to Google. PageRank is a registered trademark of Google Inc.
|
145
|
+
</p>
|
146
|
+
</small>
|
147
|
+
</div>
|
148
|
+
</div>
|
149
|
+
</div>
|
150
|
+
|
151
|
+
<script type="text/javascript">
|
152
|
+
var uservoiceOptions = {
|
153
|
+
key: 'pagerankalert',
|
154
|
+
host: 'pagerankalert.uservoice.com',
|
155
|
+
forum: '6999',
|
156
|
+
showTab: true,
|
157
|
+
|
158
|
+
alignment: 'left',
|
159
|
+
background_color:'#f00',
|
160
|
+
text_color: 'white',
|
161
|
+
hover_color: '#06C',
|
162
|
+
lang: 'en'
|
163
|
+
};
|
164
|
+
|
165
|
+
function _loadUserVoice() {
|
166
|
+
var s = document.createElement('script');
|
167
|
+
s.setAttribute('type', 'text/javascript');
|
168
|
+
s.setAttribute('src', ("https:" == document.location.protocol ? "https://" : "http://") + "cdn.uservoice.com/javascripts/widgets/tab.js");
|
169
|
+
document.getElementsByTagName('head')[0].appendChild(s);
|
170
|
+
}
|
171
|
+
_loadSuper = window.onload;
|
172
|
+
window.onload = (typeof window.onload != 'function') ? _loadUserVoice : function() { _loadSuper(); _loadUserVoice(); };
|
173
|
+
</script>
|
174
|
+
<script type="text/javascript">
|
175
|
+
|
176
|
+
var _gaq = _gaq || [];
|
177
|
+
_gaq.push(['_setAccount', 'UA-122379-8']);
|
178
|
+
_gaq.push(['_trackPageview']);
|
179
|
+
|
180
|
+
(function() {
|
181
|
+
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
182
|
+
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
183
|
+
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
184
|
+
})();
|
185
|
+
|
186
|
+
</script>
|
187
|
+
</body>
|
188
|
+
</html>
|
data/spec/parser_spec.rb
CHANGED
@@ -89,11 +89,26 @@ describe MetaInspector::Parser do
|
|
89
89
|
end
|
90
90
|
|
91
91
|
describe '#favicon' do
|
92
|
-
it "should get favicon link" do
|
92
|
+
it "should get favicon link when marked as icon" do
|
93
93
|
@m = MetaInspector::Parser.new(doc 'http://pagerankalert.com/')
|
94
94
|
@m.favicon.should == 'http://pagerankalert.com/src/favicon.ico'
|
95
95
|
end
|
96
96
|
|
97
|
+
it "should get favicon link when marked as shortcut" do
|
98
|
+
@m = MetaInspector::Parser.new(doc 'http://pagerankalert-shortcut.com/')
|
99
|
+
@m.favicon.should == 'http://pagerankalert-shortcut.com/src/favicon.ico'
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should get favicon link when marked as shorcut and icon" do
|
103
|
+
@m = MetaInspector::Parser.new(doc 'http://pagerankalert-shortcut-and-icon.com/')
|
104
|
+
@m.favicon.should == 'http://pagerankalert-shortcut-and-icon.com/src/favicon.ico'
|
105
|
+
end
|
106
|
+
|
107
|
+
it "should get favicon link when there is also a touch icon" do
|
108
|
+
@m = MetaInspector::Parser.new(doc 'http://pagerankalert-touch-icon.com/')
|
109
|
+
@m.favicon.should == 'http://pagerankalert-touch-icon.com/src/favicon.ico'
|
110
|
+
end
|
111
|
+
|
97
112
|
it "should get favicon link of nil" do
|
98
113
|
@m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
99
114
|
@m.favicon.should == nil
|
data/spec/request_spec.rb
CHANGED
@@ -44,6 +44,7 @@ describe MetaInspector::Request do
|
|
44
44
|
end
|
45
45
|
|
46
46
|
it "should handle socket errors" do
|
47
|
+
TCPSocket.stub(:open).and_raise(SocketError)
|
47
48
|
logger.should receive(:<<).with(an_instance_of(SocketError))
|
48
49
|
|
49
50
|
MetaInspector::Request.new(url('http://caca232dsdsaer3sdsd-asd343.org'), exception_log: logger)
|
data/spec/spec_helper.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
$: << File.join(File.dirname(__FILE__), "/../lib")
|
4
4
|
require 'meta_inspector'
|
5
5
|
require 'fakeweb'
|
6
|
+
require "pry"
|
6
7
|
|
7
8
|
FakeWeb.allow_net_connect = false
|
8
9
|
|
@@ -12,12 +13,21 @@ def fixture_file(filename)
|
|
12
13
|
File.read(file_path)
|
13
14
|
end
|
14
15
|
|
16
|
+
RSpec.configure do |config|
|
17
|
+
config.filter_run focus: true
|
18
|
+
config.run_all_when_everything_filtered = true
|
19
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true #rspec 3 default
|
20
|
+
end
|
21
|
+
|
15
22
|
#######################
|
16
23
|
# Faked web responses #
|
17
24
|
#######################
|
18
25
|
|
19
26
|
FakeWeb.register_uri(:get, "http://example.com/", :response => fixture_file("empty_page.response"))
|
20
27
|
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
28
|
+
FakeWeb.register_uri(:get, "http://pagerankalert-shortcut.com", :response => fixture_file("pagerankalert-shortcut.com.response"))
|
29
|
+
FakeWeb.register_uri(:get, "http://pagerankalert-shortcut-and-icon.com", :response => fixture_file("pagerankalert-shortcut-and-icon.com.response"))
|
30
|
+
FakeWeb.register_uri(:get, "http://pagerankalert-touch-icon.com", :response => fixture_file("pagerankalert-touch-icon.com.response"))
|
21
31
|
FakeWeb.register_uri(:get, "pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
22
32
|
FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
|
23
33
|
FakeWeb.register_uri(:get, "http://alazan.com/websolution.asp", :response => fixture_file("alazan_websolution.response"))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 10.1.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: pry
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
111
125
|
description: MetaInspector lets you scrape a web page and get its title, charset,
|
112
126
|
link and meta tags
|
113
127
|
email:
|
@@ -153,6 +167,9 @@ files:
|
|
153
167
|
- spec/fixtures/markupvalidator_faqs.response
|
154
168
|
- spec/fixtures/meta_tags.response
|
155
169
|
- spec/fixtures/nonhttp.response
|
170
|
+
- spec/fixtures/pagerankalert-shortcut-and-icon.com.response
|
171
|
+
- spec/fixtures/pagerankalert-shortcut.com.response
|
172
|
+
- spec/fixtures/pagerankalert-touch-icon.com.response
|
156
173
|
- spec/fixtures/pagerankalert.com.response
|
157
174
|
- spec/fixtures/protocol_relative.response
|
158
175
|
- spec/fixtures/relative_links.response
|