metainspector 1.9.10 → 1.9.11
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/lib/meta_inspector/scraper.rb +2 -2
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/fixtures/wordpress_site.response +48 -0
- data/spec/metainspector_spec.rb +11 -6
- metadata +5 -4
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= MetaInspector {<img src="
|
1
|
+
= MetaInspector {<img src="https://secure.travis-ci.org/jaimeiniesta/metainspector.png?branch=master" />}[http://travis-ci.org/jaimeiniesta/metainspector] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/jaimeiniesta/metainspector]
|
2
2
|
|
3
3
|
MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links, images, charset, description, keywords, meta tags...
|
4
4
|
|
@@ -14,8 +14,8 @@ module MetaInspector
|
|
14
14
|
|
15
15
|
def initialize(url, timeout = 20)
|
16
16
|
@url = URI.parse(url).scheme.nil? ? 'http://' + url : url
|
17
|
-
@scheme = URI.parse(url).scheme
|
18
|
-
@host = URI.parse(url).host
|
17
|
+
@scheme = URI.parse(@url).scheme
|
18
|
+
@host = URI.parse(@url).host
|
19
19
|
@root_url = "#{@scheme}://#{@host}/"
|
20
20
|
@timeout = timeout
|
21
21
|
@data = Hashie::Rash.new('url' => @url)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Accept-Ranges:bytes
|
3
|
+
Connection:Keep-Alive
|
4
|
+
Content-Encoding:gzip
|
5
|
+
Content-Length:2621
|
6
|
+
Content-Type:text/html; charset=UTF-8
|
7
|
+
Date:Thu, 08 Nov 2012 20:31:28 GMT
|
8
|
+
Keep-Alive:timeout=5, max=100
|
9
|
+
Last-Modified:Thu, 08 Nov 2012 19:19:07 GMT
|
10
|
+
Server:Apache/2.2.22 (Unix) mod_ssl/2.2.22 OpenSSL/0.9.8e-fips-rhel5 DAV/2 mod_auth_passthrough/2.1 mod_bwlimited/1.4 FrontPage/5.0.2.2635
|
11
|
+
Vary:Accept-Encoding,Cookie
|
12
|
+
X-Pingback:http://www.inkthemes.com/xmlrpc.php
|
13
|
+
X-Powered-By:W3 Total Cache/0.9.2.4
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html dir="ltr" lang="en-US">
|
17
|
+
<head>
|
18
|
+
<meta charset="UTF-8" />
|
19
|
+
<title>Colorway Theme Previews | InkThemes</title>
|
20
|
+
<link rel="profile" href="http://gmpg.org/xfn/11" />
|
21
|
+
<link rel="pingback" href="http://www.inkthemes.com/xmlrpc.php" />
|
22
|
+
<link rel="stylesheet" type="text/css" media="all" href="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/style.css" />
|
23
|
+
<link rel="alternate" type="application/rss+xml" title="InkThemes » Feed" href="http://www.inkthemes.com/feed/" />
|
24
|
+
<link rel="alternate" type="application/rss+xml" title="InkThemes » Comments Feed" href="http://www.inkthemes.com/comments/feed/" />
|
25
|
+
<link rel="alternate" type="application/rss+xml" title="InkThemes » Colorway Theme Previews Comments Feed" href="http://www.inkthemes.com/colorway-theme-previews/feed/" />
|
26
|
+
<link rel='stylesheet' id='hotspot-css-css' href='http://www.inkthemes.com/wp-content/plugins/hotspot-map/css/hotspot.css?ver=1.0' type='text/css' media='' />
|
27
|
+
<link rel='stylesheet' id='ListItStyles-css' href='http://www.inkthemes.com/wp-content/plugins/wp-listit/listit-style.css?ver=3.4.2' type='text/css' media='all' />
|
28
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/jquery-1.6.1.min.js?ver=1.7.1'></script>
|
29
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/ddsmoothmenu.js?ver=3.4.2'></script>
|
30
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/slides.min.jquery.js?ver=3.4.2'></script>
|
31
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/jquery.colorbox.js?ver=3.4.2'></script>
|
32
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/custom.js?ver=3.4.2'></script>
|
33
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-includes/js/comment-reply.js?ver=3.4.2'></script><link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://www.inkthemes.com/xmlrpc.php?rsd" /><link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://www.inkthemes.com/wp-includes/wlwmanifest.xml" /><link rel='prev' title='Elite Pro WordPress Theme' href='http://www.inkthemes.com/wp-themes/elite-pro-wordpress-theme/' /><link rel='next' title='BizWay Theme Preview' href='http://www.inkthemes.com/bizway-theme-preview/' />
|
34
|
+
<meta name="generator" content="WordPress 3.4.2" />
|
35
|
+
<link rel="canonical" href="http://www.inkthemes.com/colorway-theme-previews/" />
|
36
|
+
<script type="text/javascript">function woopraReady(tracker){}</script>
|
37
|
+
<script type="text/javascript">(function(){var wsc=document.createElement('script');wsc.type='text/javascript';wsc.src=document.location.protocol+'//static.woopra.com/js/woopra.js';wsc.async=true;var ssc=document.getElementsByTagName('script')[0];ssc.parentNode.insertBefore(wsc,ssc);})();</script>
|
38
|
+
<link rel="shortcut icon" href="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/02/favicon.ico"/> <style type="text/css">.sidebar .block_wrap
|
39
|
+
.block_content{font-size:13px;line-height:22px}.sidebar .block_wrap
|
40
|
+
.quote_name{width:295px}.gform_wrapper
|
41
|
+
ul.gform_fields{margin:-15px}h1.homeheading{font-size:18px;font-weight:normal}.pricing_wrapper .c_pricing .member_price
|
42
|
+
.m_top{height:200px}.content_wrap .post
|
43
|
+
img{padding:4px;margin-top:2px;margin-bottom:2px;-webkit-box-shadow:0 0 2px 2px #C2B9B9;box-shadow:0 0 2px 2px #C2B9B9}</style><!--[if gte IE 9]><script type="text/javascript">Cufon.set('engine','canvas');</script><![endif]--><script type="text/javascript">var hide_awf_Form=true;</script>
|
44
|
+
</head>
|
45
|
+
<body class="page page-id-3326 page-template page-template-template-theme-preview-php" style="background:url()"><div id="main_header"><div class="container_24"><div class="grid_24 header_wrapper"><div class="header"><div class="grid_6 alpha"><div class="logo_wrap"><div class="logo"><a href="http://www.inkthemes.com"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/images/logo.png" alt="InkThemes" /></a></div></div></div><div class="grid_18 omega"><div class="top_banner"><div class="menu_wrapper"><div id="menu"><ul id="menu-main-menu" class="ddsmoothmenu"><li class="page_item page-item-15"><a href="http://www.inkthemes.com/wp-themes/">Browse Themes</a></li><li class="page_item page-item-1766"><a href="http://www.inkthemes.com/pricing/">Pricing</a></li><li class="page_item page-item-1792"><a href="http://www.inkthemes.com/features/">Features</a></li><li class="page_item page-item-295"><a href="http://www.inkthemes.com/support/">Support</a></li><li class="page_item page-item-677"><a href="http://www.inkthemes.com/contact-us/">Contact Us</a></li><li class="page_item page-item-2318"><a href="http://www.inkthemes.com/blog/">Blog</a></li><li><a href="http://inkthemes.com/members/member/index">Login</a></li></ul></div></div></div></div><div class="clear"></div></div></div></div></div><div class="clear"></div><div class="top_line"></div><div class="clear"></div><div class="feature_wrapper page"><div class="container_24"><div class="grid_24"><h1 class="page_title"> Colorway Theme Previews (Multiple Examples)</h1></div></div></div><div class="clear"></div><div class="bottom_line"></div><div class="clear"></div><div class="container_24"><div class="grid_24 content_wrapper"><div class="fullwidth theme_preview"><ul class="thumbnails"><li><a href="http://inkthemes.com/wpthemes/colorwayfurniture/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview11.jpg"/></a><p>Colorway for Hotels and Restaurant Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwaypets/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview21.jpg"/></a><p>Colorway for Pets & Animals Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwayngo/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview31.jpg"/></a><p>Colorway for Non Profits Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwaysports/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview41.jpg"/></a><p>Colorway for Adventure & Sports Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwayrestaurant/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview51.jpg"/></a><p>Colorway for Food & Catering Niche</p></li></ul></div></div></div><div class="clear"></div><div class="footer_wrapper"><div class="container_24"><div class="grid_24 footer"><div class="grid_7 footer_widget alpha"> <a href="#"><div class="theme_logo"></div> </a></div><div class="grid_6 footer_widget"><div class="buyers widget"><h6>BUYERS</h6><ul class="buy-ers"><li><a target="_blank" href="http://www.inkthemes.com/support/faq/">FAQ</a></li><li><a target="_blank" href="http://www.inkthemes.com/affiliates/">Join Affiliate</a></li><li><a target="_blank" href="http://www.inkthemes.com/wp-themes/">Browse Themes</a></li><li><a target="_blank" href="http://www.inkthemes.com/features/">Our Features</a></li><li><a target="_blank" href="http://www.inkthemes.com/community/">Support Forum</a></li><li><a target="_blank" href="http://www.inkthemes.com/support/terms-and-conditions/">Terms & Conditions</a></li></ul></div></div><div class="grid_6 footer_widget"><div class="joinus widget"><h6>JOIN US</h6><ul class="social"><li class="twitter"><a target="_blank" href="https://twitter.com/inkthemes">Follow us on Twitter</a></li><li class="facebook"><a target="_blank" href="http://www.facebook.com/InkThemes">Be a fan on Facebook</a></li></ul> <br/><h6>Links</h6><p><a href="http://www.inkthemes.com/">Premium Wordpress Themes</a></p></div></div><div class="grid_5 omega footer_widget"><div class="footer_paypal widget"> <img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/images/paypal.png"/></div></div></div></div></div><div class="clear"></div><div class="footer_bottom"><div class="container_24"><div class="grid_24"><div class="copyright"><p>2012 © InkThemes. All rights reserved.</p></div></div></div></div><script type="text/javascript">var _gaq=_gaq||[];_gaq.push(['_setAccount','UA-24189791-1']);_gaq.push(['_trackPageview']);(function(){var ga=document.createElement('script');ga.type='text/javascript';ga.async=true;ga.src=('https:'==document.location.protocol?'https://ssl':'http://www')+'.google-analytics.com/ga.js';var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(ga,s);})();</script> <script type="text/javascript">function woopraReady(tracker){tracker.setDomain('inkthemes.com');tracker.setIdleTimeout(300000);tracker.track();return false;};</script>
|
46
|
+
</body>
|
47
|
+
<script>(function($,undefined){$(document).ready(function(){$('#hotspot-707').hotspot({'show_on':"mouseover",});$('#hotspot-1470').hotspot({'show_on':"mouseover",});});})(jQuery);</script><script type='text/javascript' src='http://www.inkthemes.com/wp-content/plugins/hotspot-map/js/hotspot.js?ver=1.0'></script>
|
48
|
+
</html>
|
data/spec/metainspector_spec.rb
CHANGED
@@ -4,6 +4,7 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
|
|
4
4
|
|
5
5
|
describe MetaInspector do
|
6
6
|
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
7
|
+
FakeWeb.register_uri(:get, "pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
7
8
|
FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
|
8
9
|
FakeWeb.register_uri(:get, "http://alazan.com/websolution.asp", :response => fixture_file("alazan_websolution.response"))
|
9
10
|
FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
|
@@ -22,6 +23,7 @@ describe MetaInspector do
|
|
22
23
|
FakeWeb.register_uri(:get, "http://charset000.com", :response => fixture_file("charset_000.response"))
|
23
24
|
FakeWeb.register_uri(:get, "http://charset001.com", :response => fixture_file("charset_001.response"))
|
24
25
|
FakeWeb.register_uri(:get, "http://charset002.com", :response => fixture_file("charset_002.response"))
|
26
|
+
FakeWeb.register_uri(:get, "http://www.inkthemes.com/", :response => fixture_file("wordpress_site.response"))
|
25
27
|
|
26
28
|
describe 'Initialization' do
|
27
29
|
it 'should accept an URL with a scheme' do
|
@@ -37,16 +39,19 @@ describe MetaInspector do
|
|
37
39
|
it "should store the scheme" do
|
38
40
|
MetaInspector.new('http://pagerankalert.com').scheme.should == 'http'
|
39
41
|
MetaInspector.new('https://pagerankalert.com').scheme.should == 'https'
|
42
|
+
MetaInspector.new('pagerankalert.com').scheme.should == 'http'
|
40
43
|
end
|
41
44
|
|
42
45
|
it "should store the host" do
|
43
46
|
MetaInspector.new('http://pagerankalert.com').host.should == 'pagerankalert.com'
|
44
47
|
MetaInspector.new('https://pagerankalert.com').host.should == 'pagerankalert.com'
|
48
|
+
MetaInspector.new('pagerankalert.com').host.should == 'pagerankalert.com'
|
45
49
|
end
|
46
50
|
|
47
51
|
it "should store the root url" do
|
48
52
|
MetaInspector.new('http://pagerankalert.com').root_url.should == 'http://pagerankalert.com/'
|
49
53
|
MetaInspector.new('https://pagerankalert.com').root_url.should == 'https://pagerankalert.com/'
|
54
|
+
MetaInspector.new('pagerankalert.com').root_url.should == 'http://pagerankalert.com/'
|
50
55
|
end
|
51
56
|
end
|
52
57
|
|
@@ -256,15 +261,15 @@ describe MetaInspector do
|
|
256
261
|
@m.meta_Csrf_pAram.should == "authenticity_token"
|
257
262
|
end
|
258
263
|
|
259
|
-
it "should get the generator meta tag" do
|
260
|
-
pending "mocks"
|
261
|
-
@m.meta_generator.should == 'WordPress 2.8.4'
|
262
|
-
end
|
263
|
-
|
264
264
|
it "should return nil for nonfound meta_tags" do
|
265
265
|
@m.meta_lollypop.should == nil
|
266
266
|
end
|
267
267
|
|
268
|
+
it "should get the generator meta tag" do
|
269
|
+
@m = MetaInspector.new('http://www.inkthemes.com/')
|
270
|
+
@m.meta_generator.should == 'WordPress 3.4.2'
|
271
|
+
end
|
272
|
+
|
268
273
|
it "should find a meta_og_title" do
|
269
274
|
@m = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
270
275
|
@m.meta_og_title.should == "Apple Claims New iPhone Only Visible To Most Loyal Of Customers"
|
@@ -332,7 +337,7 @@ describe MetaInspector do
|
|
332
337
|
|
333
338
|
describe "parsed?" do
|
334
339
|
it "should return true if we have a parsed document" do
|
335
|
-
good = MetaInspector.new('
|
340
|
+
good = MetaInspector.new('http://pagerankalert.com')
|
336
341
|
title = good.title
|
337
342
|
|
338
343
|
good.parsed?.should == true
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 37
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 9
|
9
|
-
-
|
10
|
-
version: 1.9.
|
9
|
+
- 11
|
10
|
+
version: 1.9.11
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jaime Iniesta
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-09
|
18
|
+
date: 2012-11-09 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
@@ -154,6 +154,7 @@ files:
|
|
154
154
|
- spec/fixtures/theonion.com.response
|
155
155
|
- spec/fixtures/twitter_w3clove.response
|
156
156
|
- spec/fixtures/w3clove_faqs.response
|
157
|
+
- spec/fixtures/wordpress_site.response
|
157
158
|
- spec/fixtures/youtube.response
|
158
159
|
- spec/metainspector_spec.rb
|
159
160
|
- spec/spec_helper.rb
|