metainspector 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/meta_inspector/document.rb +2 -2
- data/lib/meta_inspector/parser.rb +7 -4
- data/lib/meta_inspector/request.rb +3 -4
- data/lib/meta_inspector/url.rb +3 -4
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -0
- data/spec/fixtures/pagerankalert-shortcut-and-icon.com.response +187 -0
- data/spec/fixtures/pagerankalert-shortcut.com.response +187 -0
- data/spec/fixtures/pagerankalert-touch-icon.com.response +188 -0
- data/spec/parser_spec.rb +16 -1
- data/spec/request_spec.rb +1 -0
- data/spec/spec_helper.rb +10 -0
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f703091a8a43619fd8f6ecfdb60de18a6325a41c
|
4
|
+
data.tar.gz: 629dff9035a4c8c5c4aa6b27d73a3e689a0873a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 361be3755dbaab2ac880d8f5414c613ae1d3fde35899c6256710c8e94733105da4c13a007cfe002760775ef9b39961dc571c37470ecdba0ade89690aeac33de3
|
7
|
+
data.tar.gz: 79eb048b137b55088b2e9ac587dfcd31300fa121073f79fcd8a5598e1384670f54fcc84cf4951f9d111f7e468219ec6fa065917146ebe622f42826ffadd7562f
|
@@ -37,9 +37,8 @@ module MetaInspector
|
|
37
37
|
# Returns the whole parsed document
|
38
38
|
def parsed
|
39
39
|
@parsed ||= Nokogiri::HTML(@document.to_s)
|
40
|
-
|
41
|
-
|
42
|
-
@exception_log << e
|
40
|
+
rescue Exception => e
|
41
|
+
@exception_log << e
|
43
42
|
end
|
44
43
|
|
45
44
|
# Returns the parsed document title, from the content of the <title> tag.
|
@@ -50,7 +49,11 @@ module MetaInspector
|
|
50
49
|
|
51
50
|
# Return favicon url if exist
|
52
51
|
def favicon
|
53
|
-
|
52
|
+
query = '//link[@rel="icon" or contains(@rel, "shortcut")]'
|
53
|
+
value = parsed.xpath(query)[0].attributes['href'].value
|
54
|
+
@favicon ||= URL.absolutify(value, base_url)
|
55
|
+
rescue
|
56
|
+
nil
|
54
57
|
end
|
55
58
|
|
56
59
|
# A description getter that first checks for a meta description and if not present will
|
@@ -37,10 +37,9 @@ module MetaInspector
|
|
37
37
|
|
38
38
|
def response
|
39
39
|
Timeout::timeout(@timeout) { @response ||= fetch }
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
nil
|
40
|
+
rescue TimeoutError, SocketError, RuntimeError => e
|
41
|
+
@exception_log << e
|
42
|
+
nil
|
44
43
|
end
|
45
44
|
|
46
45
|
def fetch
|
data/lib/meta_inspector/url.rb
CHANGED
@@ -66,10 +66,9 @@ module MetaInspector
|
|
66
66
|
|
67
67
|
def parsed(url)
|
68
68
|
Addressable::URI.parse(url)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
nil
|
69
|
+
rescue Addressable::URI::InvalidURIError => e
|
70
|
+
@exception_log << e
|
71
|
+
nil
|
73
72
|
end
|
74
73
|
end
|
75
74
|
end
|
data/meta_inspector.gemspec
CHANGED
@@ -0,0 +1,187 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Mon, 30 May 2011 09:45:42 GMT
|
4
|
+
Content-Type: text/html; charset=utf-8
|
5
|
+
Connection: keep-alive
|
6
|
+
ETag: "d0534cf7ad7d7a7fb737fe4ad99b0fd1"
|
7
|
+
X-UA-Compatible: IE=Edge,chrome=1
|
8
|
+
X-Runtime: 0.031274
|
9
|
+
Set-Cookie: _session_id=33575f7694b4492af4c4e282d62a7127; path=/; HttpOnly
|
10
|
+
Cache-Control: max-age=0, private, must-revalidate
|
11
|
+
Content-Length: 6690
|
12
|
+
X-Varnish: 2167295052
|
13
|
+
Age: 0
|
14
|
+
Via: 1.1 varnish
|
15
|
+
|
16
|
+
<!DOCTYPE html>
|
17
|
+
<html lang="en">
|
18
|
+
<head>
|
19
|
+
<meta charset=utf-8>
|
20
|
+
<link rel="alternate" type="application/rss+xml" title="PageRankAlert.com blog" href="http://feeds.feedburner.com/PageRankAlert" />
|
21
|
+
<title>PageRankAlert.com :: Track your PageRank changes & receive alerts</title>
|
22
|
+
<link rel="shortcut icon" href="/src/favicon.ico">
|
23
|
+
<meta name="description" content="Track your PageRank(TM) changes and receive alerts by email" />
|
24
|
+
<meta name="keywords" content="pagerank, seo, optimization, google" />
|
25
|
+
<meta name="robots" content="all,follow" />
|
26
|
+
<link rel="stylesheet" href="/stylesheets/screen.css">
|
27
|
+
<link href='http://fonts.googleapis.com/css?family=Yanone+Kaffeesatz&subset=latin' rel='stylesheet' type='text/css'>
|
28
|
+
|
29
|
+
<script src="/javascripts/jquery.min.js?1305794559" type="text/javascript"></script>
|
30
|
+
<script src="/javascripts/rails.js?1305794559" type="text/javascript"></script>
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
<meta name="csrf-param" content="authenticity_token"/>
|
35
|
+
<meta name="csrf-token" content="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="/>
|
36
|
+
</head>
|
37
|
+
|
38
|
+
<body>
|
39
|
+
<script type="text/javascript">
|
40
|
+
(function(){
|
41
|
+
var bsa = document.createElement('script');
|
42
|
+
bsa.type = 'text/javascript';
|
43
|
+
bsa.async = true;
|
44
|
+
bsa.src = '//s3.buysellads.com/ac/bsa.js';
|
45
|
+
(document.getElementsByTagName('head')[0]||document.getElementsByTagName('body')[0]).appendChild(bsa);
|
46
|
+
})();
|
47
|
+
</script> <div id="flash_notice" class="flashmessage" style='display:none;'></div>
|
48
|
+
|
49
|
+
<div id="flash_error" class="flashmessage" style='display:none;'></div>
|
50
|
+
|
51
|
+
<div id="flash_alert" class="flashmessage" style='display:none;'></div>
|
52
|
+
|
53
|
+
<div id="banner_top" style="background-color:#111; color:#fff;text-align:center;font-size:1.4em;font-weight:bold;padding:0.6em;">
|
54
|
+
<div style="width:980px; margin:0 auto;">
|
55
|
+
<div id="bsap_1260794" style="margin-left: 125px;" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
56
|
+
</div>
|
57
|
+
</div>
|
58
|
+
|
59
|
+
<div id="page">
|
60
|
+
<div id="header">
|
61
|
+
<div class="section">
|
62
|
+
<h1>
|
63
|
+
<a href="/" id="logo" tabindex="1"><img alt="PageRankAlert" src="/images/pagerank_alert.png?1305794559" /></a>
|
64
|
+
</h1>
|
65
|
+
</div>
|
66
|
+
|
67
|
+
<div class="section language">
|
68
|
+
<span id='eng'>eng</span> |
|
69
|
+
<a href="/es?language=es"><span id='esp'>esp</span></a>
|
70
|
+
</div>
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
<div class="section login">
|
77
|
+
<a href="/users/sign_up">Register</a> or <a href="/users/sign_in">Sign in</a>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
<div class="aside">
|
81
|
+
<ul class="nav">
|
82
|
+
<li><a href="/" class="selectedss">home</a></li>
|
83
|
+
<li><a href="mailto:pagerankalert@gmail.com">contact</a></li>
|
84
|
+
<li><a href="http://pagerankalert.posterous.com" target="_blank">blog</a></li>
|
85
|
+
<li><a href="http://twitter.com/pagerankalert" target="_blank">twitter</a></li>
|
86
|
+
<li style="margin-top:10px; margin-left:40px;"><a href="http://twitter.com/share" style="display:block;" class="twitter-share-button" data-count="horizontal" data-via="pagerankalert" data-lang="en">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script></li>
|
87
|
+
</ul>
|
88
|
+
</div>
|
89
|
+
</div>
|
90
|
+
|
91
|
+
<div id="main">
|
92
|
+
<div id="content1">
|
93
|
+
<h2 class="begin">
|
94
|
+
Track your PageRank changes
|
95
|
+
</h2>
|
96
|
+
<h3 class="begin">WHAT'S YOUR PAGERANK?</h3>
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
<div id="banner_up" class="tac" style="margin-top:2em; margin-bottom:-2em;">
|
101
|
+
<div id="bsap_1258008" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1" style="width: 728px; padding-left:115px;"></div>
|
102
|
+
</div>
|
103
|
+
|
104
|
+
<form accept-charset="UTF-8" action="/pages" class="search_form general_form" id="pagerank" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="✓" /><input name="authenticity_token" type="hidden" value="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE=" /></div>
|
105
|
+
<label for="myurl">Search your page now:</label>
|
106
|
+
<input class="text" id="page_url" maxlength="250" name="page[url]" size="60" type="text" value="http://" />
|
107
|
+
|
108
|
+
<p class="explanation">
|
109
|
+
PageRank is different for the same URL with or without <em>www</em>
|
110
|
+
</p>
|
111
|
+
<p class="tac">
|
112
|
+
<button type="submit">Check PageRank »</button>
|
113
|
+
</p>
|
114
|
+
</form>
|
115
|
+
<div id="features">
|
116
|
+
<div id="a" class="section">
|
117
|
+
<h4><strong>Build your own lists</strong></h4>
|
118
|
+
<p>Have you got a lot of pages that you'd like to track easily from one single place?<br /><br />You can build your own <strong>PageRank watchlist</strong> with an unlimited number of URLs.</p>
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div id="b" class="section">
|
122
|
+
<h4><strong>Get e-mail alerts</strong></h4>
|
123
|
+
<p>Do you want to be notified by email when your <strong>PageRank</strong> changes?<br /><br />You'll get an <strong>email</strong> when we detect a change in any of your tracked sites.</p>
|
124
|
+
</div>
|
125
|
+
|
126
|
+
<div id="c" class="section last">
|
127
|
+
<h4><strong>Track your history</strong></h4>
|
128
|
+
<p>Do you want to know the details of your <strong>PageRank evolution</strong>?<br /><br />We will show you a graphic with all the registered PageRank changes for each page.</p>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
|
132
|
+
<div id="banner_down" class="tac">
|
133
|
+
<div id="bsap_1256852" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
134
|
+
</div>
|
135
|
+
|
136
|
+
</div>
|
137
|
+
</div>
|
138
|
+
|
139
|
+
<div id="footer">
|
140
|
+
<div id="credits" class="section">
|
141
|
+
<small>
|
142
|
+
<p>
|
143
|
+
© 2006-2011 PageRankAlert.com. This site is not associated to Google. PageRank is a registered trademark of Google Inc.
|
144
|
+
</p>
|
145
|
+
</small>
|
146
|
+
</div>
|
147
|
+
</div>
|
148
|
+
</div>
|
149
|
+
|
150
|
+
<script type="text/javascript">
|
151
|
+
var uservoiceOptions = {
|
152
|
+
key: 'pagerankalert',
|
153
|
+
host: 'pagerankalert.uservoice.com',
|
154
|
+
forum: '6999',
|
155
|
+
showTab: true,
|
156
|
+
|
157
|
+
alignment: 'left',
|
158
|
+
background_color:'#f00',
|
159
|
+
text_color: 'white',
|
160
|
+
hover_color: '#06C',
|
161
|
+
lang: 'en'
|
162
|
+
};
|
163
|
+
|
164
|
+
function _loadUserVoice() {
|
165
|
+
var s = document.createElement('script');
|
166
|
+
s.setAttribute('type', 'text/javascript');
|
167
|
+
s.setAttribute('src', ("https:" == document.location.protocol ? "https://" : "http://") + "cdn.uservoice.com/javascripts/widgets/tab.js");
|
168
|
+
document.getElementsByTagName('head')[0].appendChild(s);
|
169
|
+
}
|
170
|
+
_loadSuper = window.onload;
|
171
|
+
window.onload = (typeof window.onload != 'function') ? _loadUserVoice : function() { _loadSuper(); _loadUserVoice(); };
|
172
|
+
</script>
|
173
|
+
<script type="text/javascript">
|
174
|
+
|
175
|
+
var _gaq = _gaq || [];
|
176
|
+
_gaq.push(['_setAccount', 'UA-122379-8']);
|
177
|
+
_gaq.push(['_trackPageview']);
|
178
|
+
|
179
|
+
(function() {
|
180
|
+
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
181
|
+
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
182
|
+
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
183
|
+
})();
|
184
|
+
|
185
|
+
</script>
|
186
|
+
</body>
|
187
|
+
</html>
|
@@ -0,0 +1,187 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Mon, 30 May 2011 09:45:42 GMT
|
4
|
+
Content-Type: text/html; charset=utf-8
|
5
|
+
Connection: keep-alive
|
6
|
+
ETag: "d0534cf7ad7d7a7fb737fe4ad99b0fd1"
|
7
|
+
X-UA-Compatible: IE=Edge,chrome=1
|
8
|
+
X-Runtime: 0.031274
|
9
|
+
Set-Cookie: _session_id=33575f7694b4492af4c4e282d62a7127; path=/; HttpOnly
|
10
|
+
Cache-Control: max-age=0, private, must-revalidate
|
11
|
+
Content-Length: 6690
|
12
|
+
X-Varnish: 2167295052
|
13
|
+
Age: 0
|
14
|
+
Via: 1.1 varnish
|
15
|
+
|
16
|
+
<!DOCTYPE html>
|
17
|
+
<html lang="en">
|
18
|
+
<head>
|
19
|
+
<meta charset=utf-8>
|
20
|
+
<link rel="alternate" type="application/rss+xml" title="PageRankAlert.com blog" href="http://feeds.feedburner.com/PageRankAlert" />
|
21
|
+
<title>PageRankAlert.com :: Track your PageRank changes & receive alerts</title>
|
22
|
+
<link rel="shortcut" href="/src/favicon.ico">
|
23
|
+
<meta name="description" content="Track your PageRank(TM) changes and receive alerts by email" />
|
24
|
+
<meta name="keywords" content="pagerank, seo, optimization, google" />
|
25
|
+
<meta name="robots" content="all,follow" />
|
26
|
+
<link rel="stylesheet" href="/stylesheets/screen.css">
|
27
|
+
<link href='http://fonts.googleapis.com/css?family=Yanone+Kaffeesatz&subset=latin' rel='stylesheet' type='text/css'>
|
28
|
+
|
29
|
+
<script src="/javascripts/jquery.min.js?1305794559" type="text/javascript"></script>
|
30
|
+
<script src="/javascripts/rails.js?1305794559" type="text/javascript"></script>
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
<meta name="csrf-param" content="authenticity_token"/>
|
35
|
+
<meta name="csrf-token" content="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="/>
|
36
|
+
</head>
|
37
|
+
|
38
|
+
<body>
|
39
|
+
<script type="text/javascript">
|
40
|
+
(function(){
|
41
|
+
var bsa = document.createElement('script');
|
42
|
+
bsa.type = 'text/javascript';
|
43
|
+
bsa.async = true;
|
44
|
+
bsa.src = '//s3.buysellads.com/ac/bsa.js';
|
45
|
+
(document.getElementsByTagName('head')[0]||document.getElementsByTagName('body')[0]).appendChild(bsa);
|
46
|
+
})();
|
47
|
+
</script> <div id="flash_notice" class="flashmessage" style='display:none;'></div>
|
48
|
+
|
49
|
+
<div id="flash_error" class="flashmessage" style='display:none;'></div>
|
50
|
+
|
51
|
+
<div id="flash_alert" class="flashmessage" style='display:none;'></div>
|
52
|
+
|
53
|
+
<div id="banner_top" style="background-color:#111; color:#fff;text-align:center;font-size:1.4em;font-weight:bold;padding:0.6em;">
|
54
|
+
<div style="width:980px; margin:0 auto;">
|
55
|
+
<div id="bsap_1260794" style="margin-left: 125px;" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
56
|
+
</div>
|
57
|
+
</div>
|
58
|
+
|
59
|
+
<div id="page">
|
60
|
+
<div id="header">
|
61
|
+
<div class="section">
|
62
|
+
<h1>
|
63
|
+
<a href="/" id="logo" tabindex="1"><img alt="PageRankAlert" src="/images/pagerank_alert.png?1305794559" /></a>
|
64
|
+
</h1>
|
65
|
+
</div>
|
66
|
+
|
67
|
+
<div class="section language">
|
68
|
+
<span id='eng'>eng</span> |
|
69
|
+
<a href="/es?language=es"><span id='esp'>esp</span></a>
|
70
|
+
</div>
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
<div class="section login">
|
77
|
+
<a href="/users/sign_up">Register</a> or <a href="/users/sign_in">Sign in</a>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
<div class="aside">
|
81
|
+
<ul class="nav">
|
82
|
+
<li><a href="/" class="selectedss">home</a></li>
|
83
|
+
<li><a href="mailto:pagerankalert@gmail.com">contact</a></li>
|
84
|
+
<li><a href="http://pagerankalert.posterous.com" target="_blank">blog</a></li>
|
85
|
+
<li><a href="http://twitter.com/pagerankalert" target="_blank">twitter</a></li>
|
86
|
+
<li style="margin-top:10px; margin-left:40px;"><a href="http://twitter.com/share" style="display:block;" class="twitter-share-button" data-count="horizontal" data-via="pagerankalert" data-lang="en">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script></li>
|
87
|
+
</ul>
|
88
|
+
</div>
|
89
|
+
</div>
|
90
|
+
|
91
|
+
<div id="main">
|
92
|
+
<div id="content1">
|
93
|
+
<h2 class="begin">
|
94
|
+
Track your PageRank changes
|
95
|
+
</h2>
|
96
|
+
<h3 class="begin">WHAT'S YOUR PAGERANK?</h3>
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
<div id="banner_up" class="tac" style="margin-top:2em; margin-bottom:-2em;">
|
101
|
+
<div id="bsap_1258008" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1" style="width: 728px; padding-left:115px;"></div>
|
102
|
+
</div>
|
103
|
+
|
104
|
+
<form accept-charset="UTF-8" action="/pages" class="search_form general_form" id="pagerank" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="✓" /><input name="authenticity_token" type="hidden" value="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE=" /></div>
|
105
|
+
<label for="myurl">Search your page now:</label>
|
106
|
+
<input class="text" id="page_url" maxlength="250" name="page[url]" size="60" type="text" value="http://" />
|
107
|
+
|
108
|
+
<p class="explanation">
|
109
|
+
PageRank is different for the same URL with or without <em>www</em>
|
110
|
+
</p>
|
111
|
+
<p class="tac">
|
112
|
+
<button type="submit">Check PageRank »</button>
|
113
|
+
</p>
|
114
|
+
</form>
|
115
|
+
<div id="features">
|
116
|
+
<div id="a" class="section">
|
117
|
+
<h4><strong>Build your own lists</strong></h4>
|
118
|
+
<p>Have you got a lot of pages that you'd like to track easily from one single place?<br /><br />You can build your own <strong>PageRank watchlist</strong> with an unlimited number of URLs.</p>
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div id="b" class="section">
|
122
|
+
<h4><strong>Get e-mail alerts</strong></h4>
|
123
|
+
<p>Do you want to be notified by email when your <strong>PageRank</strong> changes?<br /><br />You'll get an <strong>email</strong> when we detect a change in any of your tracked sites.</p>
|
124
|
+
</div>
|
125
|
+
|
126
|
+
<div id="c" class="section last">
|
127
|
+
<h4><strong>Track your history</strong></h4>
|
128
|
+
<p>Do you want to know the details of your <strong>PageRank evolution</strong>?<br /><br />We will show you a graphic with all the registered PageRank changes for each page.</p>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
|
132
|
+
<div id="banner_down" class="tac">
|
133
|
+
<div id="bsap_1256852" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
134
|
+
</div>
|
135
|
+
|
136
|
+
</div>
|
137
|
+
</div>
|
138
|
+
|
139
|
+
<div id="footer">
|
140
|
+
<div id="credits" class="section">
|
141
|
+
<small>
|
142
|
+
<p>
|
143
|
+
© 2006-2011 PageRankAlert.com. This site is not associated to Google. PageRank is a registered trademark of Google Inc.
|
144
|
+
</p>
|
145
|
+
</small>
|
146
|
+
</div>
|
147
|
+
</div>
|
148
|
+
</div>
|
149
|
+
|
150
|
+
<script type="text/javascript">
|
151
|
+
var uservoiceOptions = {
|
152
|
+
key: 'pagerankalert',
|
153
|
+
host: 'pagerankalert.uservoice.com',
|
154
|
+
forum: '6999',
|
155
|
+
showTab: true,
|
156
|
+
|
157
|
+
alignment: 'left',
|
158
|
+
background_color:'#f00',
|
159
|
+
text_color: 'white',
|
160
|
+
hover_color: '#06C',
|
161
|
+
lang: 'en'
|
162
|
+
};
|
163
|
+
|
164
|
+
function _loadUserVoice() {
|
165
|
+
var s = document.createElement('script');
|
166
|
+
s.setAttribute('type', 'text/javascript');
|
167
|
+
s.setAttribute('src', ("https:" == document.location.protocol ? "https://" : "http://") + "cdn.uservoice.com/javascripts/widgets/tab.js");
|
168
|
+
document.getElementsByTagName('head')[0].appendChild(s);
|
169
|
+
}
|
170
|
+
_loadSuper = window.onload;
|
171
|
+
window.onload = (typeof window.onload != 'function') ? _loadUserVoice : function() { _loadSuper(); _loadUserVoice(); };
|
172
|
+
</script>
|
173
|
+
<script type="text/javascript">
|
174
|
+
|
175
|
+
var _gaq = _gaq || [];
|
176
|
+
_gaq.push(['_setAccount', 'UA-122379-8']);
|
177
|
+
_gaq.push(['_trackPageview']);
|
178
|
+
|
179
|
+
(function() {
|
180
|
+
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
181
|
+
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
182
|
+
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
183
|
+
})();
|
184
|
+
|
185
|
+
</script>
|
186
|
+
</body>
|
187
|
+
</html>
|
@@ -0,0 +1,188 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Mon, 30 May 2011 09:45:42 GMT
|
4
|
+
Content-Type: text/html; charset=utf-8
|
5
|
+
Connection: keep-alive
|
6
|
+
ETag: "d0534cf7ad7d7a7fb737fe4ad99b0fd1"
|
7
|
+
X-UA-Compatible: IE=Edge,chrome=1
|
8
|
+
X-Runtime: 0.031274
|
9
|
+
Set-Cookie: _session_id=33575f7694b4492af4c4e282d62a7127; path=/; HttpOnly
|
10
|
+
Cache-Control: max-age=0, private, must-revalidate
|
11
|
+
Content-Length: 6690
|
12
|
+
X-Varnish: 2167295052
|
13
|
+
Age: 0
|
14
|
+
Via: 1.1 varnish
|
15
|
+
|
16
|
+
<!DOCTYPE html>
|
17
|
+
<html lang="en">
|
18
|
+
<head>
|
19
|
+
<meta charset=utf-8>
|
20
|
+
<link rel="alternate" type="application/rss+xml" title="PageRankAlert.com blog" href="http://feeds.feedburner.com/PageRankAlert" />
|
21
|
+
<title>PageRankAlert.com :: Track your PageRank changes & receive alerts</title>
|
22
|
+
<link href="img/apple-touch-icon-58x58.png" rel="apple-touch-icon" sizes="58x58" />
|
23
|
+
<link rel="icon" href="/src/favicon.ico">
|
24
|
+
<meta name="description" content="Track your PageRank(TM) changes and receive alerts by email" />
|
25
|
+
<meta name="keywords" content="pagerank, seo, optimization, google" />
|
26
|
+
<meta name="robots" content="all,follow" />
|
27
|
+
<link rel="stylesheet" href="/stylesheets/screen.css">
|
28
|
+
<link href='http://fonts.googleapis.com/css?family=Yanone+Kaffeesatz&subset=latin' rel='stylesheet' type='text/css'>
|
29
|
+
|
30
|
+
<script src="/javascripts/jquery.min.js?1305794559" type="text/javascript"></script>
|
31
|
+
<script src="/javascripts/rails.js?1305794559" type="text/javascript"></script>
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
<meta name="csrf-param" content="authenticity_token"/>
|
36
|
+
<meta name="csrf-token" content="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="/>
|
37
|
+
</head>
|
38
|
+
|
39
|
+
<body>
|
40
|
+
<script type="text/javascript">
|
41
|
+
(function(){
|
42
|
+
var bsa = document.createElement('script');
|
43
|
+
bsa.type = 'text/javascript';
|
44
|
+
bsa.async = true;
|
45
|
+
bsa.src = '//s3.buysellads.com/ac/bsa.js';
|
46
|
+
(document.getElementsByTagName('head')[0]||document.getElementsByTagName('body')[0]).appendChild(bsa);
|
47
|
+
})();
|
48
|
+
</script> <div id="flash_notice" class="flashmessage" style='display:none;'></div>
|
49
|
+
|
50
|
+
<div id="flash_error" class="flashmessage" style='display:none;'></div>
|
51
|
+
|
52
|
+
<div id="flash_alert" class="flashmessage" style='display:none;'></div>
|
53
|
+
|
54
|
+
<div id="banner_top" style="background-color:#111; color:#fff;text-align:center;font-size:1.4em;font-weight:bold;padding:0.6em;">
|
55
|
+
<div style="width:980px; margin:0 auto;">
|
56
|
+
<div id="bsap_1260794" style="margin-left: 125px;" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
57
|
+
</div>
|
58
|
+
</div>
|
59
|
+
|
60
|
+
<div id="page">
|
61
|
+
<div id="header">
|
62
|
+
<div class="section">
|
63
|
+
<h1>
|
64
|
+
<a href="/" id="logo" tabindex="1"><img alt="PageRankAlert" src="/images/pagerank_alert.png?1305794559" /></a>
|
65
|
+
</h1>
|
66
|
+
</div>
|
67
|
+
|
68
|
+
<div class="section language">
|
69
|
+
<span id='eng'>eng</span> |
|
70
|
+
<a href="/es?language=es"><span id='esp'>esp</span></a>
|
71
|
+
</div>
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
<div class="section login">
|
78
|
+
<a href="/users/sign_up">Register</a> or <a href="/users/sign_in">Sign in</a>
|
79
|
+
</div>
|
80
|
+
|
81
|
+
<div class="aside">
|
82
|
+
<ul class="nav">
|
83
|
+
<li><a href="/" class="selectedss">home</a></li>
|
84
|
+
<li><a href="mailto:pagerankalert@gmail.com">contact</a></li>
|
85
|
+
<li><a href="http://pagerankalert.posterous.com" target="_blank">blog</a></li>
|
86
|
+
<li><a href="http://twitter.com/pagerankalert" target="_blank">twitter</a></li>
|
87
|
+
<li style="margin-top:10px; margin-left:40px;"><a href="http://twitter.com/share" style="display:block;" class="twitter-share-button" data-count="horizontal" data-via="pagerankalert" data-lang="en">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script></li>
|
88
|
+
</ul>
|
89
|
+
</div>
|
90
|
+
</div>
|
91
|
+
|
92
|
+
<div id="main">
|
93
|
+
<div id="content1">
|
94
|
+
<h2 class="begin">
|
95
|
+
Track your PageRank changes
|
96
|
+
</h2>
|
97
|
+
<h3 class="begin">WHAT'S YOUR PAGERANK?</h3>
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
<div id="banner_up" class="tac" style="margin-top:2em; margin-bottom:-2em;">
|
102
|
+
<div id="bsap_1258008" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1" style="width: 728px; padding-left:115px;"></div>
|
103
|
+
</div>
|
104
|
+
|
105
|
+
<form accept-charset="UTF-8" action="/pages" class="search_form general_form" id="pagerank" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="✓" /><input name="authenticity_token" type="hidden" value="iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE=" /></div>
|
106
|
+
<label for="myurl">Search your page now:</label>
|
107
|
+
<input class="text" id="page_url" maxlength="250" name="page[url]" size="60" type="text" value="http://" />
|
108
|
+
|
109
|
+
<p class="explanation">
|
110
|
+
PageRank is different for the same URL with or without <em>www</em>
|
111
|
+
</p>
|
112
|
+
<p class="tac">
|
113
|
+
<button type="submit">Check PageRank »</button>
|
114
|
+
</p>
|
115
|
+
</form>
|
116
|
+
<div id="features">
|
117
|
+
<div id="a" class="section">
|
118
|
+
<h4><strong>Build your own lists</strong></h4>
|
119
|
+
<p>Have you got a lot of pages that you'd like to track easily from one single place?<br /><br />You can build your own <strong>PageRank watchlist</strong> with an unlimited number of URLs.</p>
|
120
|
+
</div>
|
121
|
+
|
122
|
+
<div id="b" class="section">
|
123
|
+
<h4><strong>Get e-mail alerts</strong></h4>
|
124
|
+
<p>Do you want to be notified by email when your <strong>PageRank</strong> changes?<br /><br />You'll get an <strong>email</strong> when we detect a change in any of your tracked sites.</p>
|
125
|
+
</div>
|
126
|
+
|
127
|
+
<div id="c" class="section last">
|
128
|
+
<h4><strong>Track your history</strong></h4>
|
129
|
+
<p>Do you want to know the details of your <strong>PageRank evolution</strong>?<br /><br />We will show you a graphic with all the registered PageRank changes for each page.</p>
|
130
|
+
</div>
|
131
|
+
</div>
|
132
|
+
|
133
|
+
<div id="banner_down" class="tac">
|
134
|
+
<div id="bsap_1256852" class="bsarocks bsap_a85b1f9acae25d5eef510375a20768f1"></div>
|
135
|
+
</div>
|
136
|
+
|
137
|
+
</div>
|
138
|
+
</div>
|
139
|
+
|
140
|
+
<div id="footer">
|
141
|
+
<div id="credits" class="section">
|
142
|
+
<small>
|
143
|
+
<p>
|
144
|
+
© 2006-2011 PageRankAlert.com. This site is not associated to Google. PageRank is a registered trademark of Google Inc.
|
145
|
+
</p>
|
146
|
+
</small>
|
147
|
+
</div>
|
148
|
+
</div>
|
149
|
+
</div>
|
150
|
+
|
151
|
+
<script type="text/javascript">
|
152
|
+
var uservoiceOptions = {
|
153
|
+
key: 'pagerankalert',
|
154
|
+
host: 'pagerankalert.uservoice.com',
|
155
|
+
forum: '6999',
|
156
|
+
showTab: true,
|
157
|
+
|
158
|
+
alignment: 'left',
|
159
|
+
background_color:'#f00',
|
160
|
+
text_color: 'white',
|
161
|
+
hover_color: '#06C',
|
162
|
+
lang: 'en'
|
163
|
+
};
|
164
|
+
|
165
|
+
function _loadUserVoice() {
|
166
|
+
var s = document.createElement('script');
|
167
|
+
s.setAttribute('type', 'text/javascript');
|
168
|
+
s.setAttribute('src', ("https:" == document.location.protocol ? "https://" : "http://") + "cdn.uservoice.com/javascripts/widgets/tab.js");
|
169
|
+
document.getElementsByTagName('head')[0].appendChild(s);
|
170
|
+
}
|
171
|
+
_loadSuper = window.onload;
|
172
|
+
window.onload = (typeof window.onload != 'function') ? _loadUserVoice : function() { _loadSuper(); _loadUserVoice(); };
|
173
|
+
</script>
|
174
|
+
<script type="text/javascript">
|
175
|
+
|
176
|
+
var _gaq = _gaq || [];
|
177
|
+
_gaq.push(['_setAccount', 'UA-122379-8']);
|
178
|
+
_gaq.push(['_trackPageview']);
|
179
|
+
|
180
|
+
(function() {
|
181
|
+
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
182
|
+
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
183
|
+
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
184
|
+
})();
|
185
|
+
|
186
|
+
</script>
|
187
|
+
</body>
|
188
|
+
</html>
|
data/spec/parser_spec.rb
CHANGED
@@ -89,11 +89,26 @@ describe MetaInspector::Parser do
|
|
89
89
|
end
|
90
90
|
|
91
91
|
describe '#favicon' do
|
92
|
-
it "should get favicon link" do
|
92
|
+
it "should get favicon link when marked as icon" do
|
93
93
|
@m = MetaInspector::Parser.new(doc 'http://pagerankalert.com/')
|
94
94
|
@m.favicon.should == 'http://pagerankalert.com/src/favicon.ico'
|
95
95
|
end
|
96
96
|
|
97
|
+
it "should get favicon link when marked as shortcut" do
|
98
|
+
@m = MetaInspector::Parser.new(doc 'http://pagerankalert-shortcut.com/')
|
99
|
+
@m.favicon.should == 'http://pagerankalert-shortcut.com/src/favicon.ico'
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should get favicon link when marked as shorcut and icon" do
|
103
|
+
@m = MetaInspector::Parser.new(doc 'http://pagerankalert-shortcut-and-icon.com/')
|
104
|
+
@m.favicon.should == 'http://pagerankalert-shortcut-and-icon.com/src/favicon.ico'
|
105
|
+
end
|
106
|
+
|
107
|
+
it "should get favicon link when there is also a touch icon" do
|
108
|
+
@m = MetaInspector::Parser.new(doc 'http://pagerankalert-touch-icon.com/')
|
109
|
+
@m.favicon.should == 'http://pagerankalert-touch-icon.com/src/favicon.ico'
|
110
|
+
end
|
111
|
+
|
97
112
|
it "should get favicon link of nil" do
|
98
113
|
@m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
99
114
|
@m.favicon.should == nil
|
data/spec/request_spec.rb
CHANGED
@@ -44,6 +44,7 @@ describe MetaInspector::Request do
|
|
44
44
|
end
|
45
45
|
|
46
46
|
it "should handle socket errors" do
|
47
|
+
TCPSocket.stub(:open).and_raise(SocketError)
|
47
48
|
logger.should receive(:<<).with(an_instance_of(SocketError))
|
48
49
|
|
49
50
|
MetaInspector::Request.new(url('http://caca232dsdsaer3sdsd-asd343.org'), exception_log: logger)
|
data/spec/spec_helper.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
$: << File.join(File.dirname(__FILE__), "/../lib")
|
4
4
|
require 'meta_inspector'
|
5
5
|
require 'fakeweb'
|
6
|
+
require "pry"
|
6
7
|
|
7
8
|
FakeWeb.allow_net_connect = false
|
8
9
|
|
@@ -12,12 +13,21 @@ def fixture_file(filename)
|
|
12
13
|
File.read(file_path)
|
13
14
|
end
|
14
15
|
|
16
|
+
RSpec.configure do |config|
|
17
|
+
config.filter_run focus: true
|
18
|
+
config.run_all_when_everything_filtered = true
|
19
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true #rspec 3 default
|
20
|
+
end
|
21
|
+
|
15
22
|
#######################
|
16
23
|
# Faked web responses #
|
17
24
|
#######################
|
18
25
|
|
19
26
|
FakeWeb.register_uri(:get, "http://example.com/", :response => fixture_file("empty_page.response"))
|
20
27
|
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
28
|
+
FakeWeb.register_uri(:get, "http://pagerankalert-shortcut.com", :response => fixture_file("pagerankalert-shortcut.com.response"))
|
29
|
+
FakeWeb.register_uri(:get, "http://pagerankalert-shortcut-and-icon.com", :response => fixture_file("pagerankalert-shortcut-and-icon.com.response"))
|
30
|
+
FakeWeb.register_uri(:get, "http://pagerankalert-touch-icon.com", :response => fixture_file("pagerankalert-touch-icon.com.response"))
|
21
31
|
FakeWeb.register_uri(:get, "pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
22
32
|
FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
|
23
33
|
FakeWeb.register_uri(:get, "http://alazan.com/websolution.asp", :response => fixture_file("alazan_websolution.response"))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 10.1.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: pry
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
111
125
|
description: MetaInspector lets you scrape a web page and get its title, charset,
|
112
126
|
link and meta tags
|
113
127
|
email:
|
@@ -153,6 +167,9 @@ files:
|
|
153
167
|
- spec/fixtures/markupvalidator_faqs.response
|
154
168
|
- spec/fixtures/meta_tags.response
|
155
169
|
- spec/fixtures/nonhttp.response
|
170
|
+
- spec/fixtures/pagerankalert-shortcut-and-icon.com.response
|
171
|
+
- spec/fixtures/pagerankalert-shortcut.com.response
|
172
|
+
- spec/fixtures/pagerankalert-touch-icon.com.response
|
156
173
|
- spec/fixtures/pagerankalert.com.response
|
157
174
|
- spec/fixtures/protocol_relative.response
|
158
175
|
- spec/fixtures/relative_links.response
|