metainspector 1.9.10 → 1.9.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/lib/meta_inspector/scraper.rb +2 -2
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/fixtures/wordpress_site.response +48 -0
- data/spec/metainspector_spec.rb +11 -6
- metadata +5 -4
data/README.rdoc
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
= MetaInspector {<img src="
|
|
1
|
+
= MetaInspector {<img src="https://secure.travis-ci.org/jaimeiniesta/metainspector.png?branch=master" />}[http://travis-ci.org/jaimeiniesta/metainspector] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/jaimeiniesta/metainspector]
|
|
2
2
|
|
|
3
3
|
MetaInspector is a gem for web scraping purposes. You give it an URL, and it lets you easily get its title, links, images, charset, description, keywords, meta tags...
|
|
4
4
|
|
|
@@ -14,8 +14,8 @@ module MetaInspector
|
|
|
14
14
|
|
|
15
15
|
def initialize(url, timeout = 20)
|
|
16
16
|
@url = URI.parse(url).scheme.nil? ? 'http://' + url : url
|
|
17
|
-
@scheme = URI.parse(url).scheme
|
|
18
|
-
@host = URI.parse(url).host
|
|
17
|
+
@scheme = URI.parse(@url).scheme
|
|
18
|
+
@host = URI.parse(@url).host
|
|
19
19
|
@root_url = "#{@scheme}://#{@host}/"
|
|
20
20
|
@timeout = timeout
|
|
21
21
|
@data = Hashie::Rash.new('url' => @url)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
HTTP/1.1 200 OK
|
|
2
|
+
Accept-Ranges:bytes
|
|
3
|
+
Connection:Keep-Alive
|
|
4
|
+
Content-Encoding:gzip
|
|
5
|
+
Content-Length:2621
|
|
6
|
+
Content-Type:text/html; charset=UTF-8
|
|
7
|
+
Date:Thu, 08 Nov 2012 20:31:28 GMT
|
|
8
|
+
Keep-Alive:timeout=5, max=100
|
|
9
|
+
Last-Modified:Thu, 08 Nov 2012 19:19:07 GMT
|
|
10
|
+
Server:Apache/2.2.22 (Unix) mod_ssl/2.2.22 OpenSSL/0.9.8e-fips-rhel5 DAV/2 mod_auth_passthrough/2.1 mod_bwlimited/1.4 FrontPage/5.0.2.2635
|
|
11
|
+
Vary:Accept-Encoding,Cookie
|
|
12
|
+
X-Pingback:http://www.inkthemes.com/xmlrpc.php
|
|
13
|
+
X-Powered-By:W3 Total Cache/0.9.2.4
|
|
14
|
+
|
|
15
|
+
<!DOCTYPE html>
|
|
16
|
+
<html dir="ltr" lang="en-US">
|
|
17
|
+
<head>
|
|
18
|
+
<meta charset="UTF-8" />
|
|
19
|
+
<title>Colorway Theme Previews | InkThemes</title>
|
|
20
|
+
<link rel="profile" href="http://gmpg.org/xfn/11" />
|
|
21
|
+
<link rel="pingback" href="http://www.inkthemes.com/xmlrpc.php" />
|
|
22
|
+
<link rel="stylesheet" type="text/css" media="all" href="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/style.css" />
|
|
23
|
+
<link rel="alternate" type="application/rss+xml" title="InkThemes » Feed" href="http://www.inkthemes.com/feed/" />
|
|
24
|
+
<link rel="alternate" type="application/rss+xml" title="InkThemes » Comments Feed" href="http://www.inkthemes.com/comments/feed/" />
|
|
25
|
+
<link rel="alternate" type="application/rss+xml" title="InkThemes » Colorway Theme Previews Comments Feed" href="http://www.inkthemes.com/colorway-theme-previews/feed/" />
|
|
26
|
+
<link rel='stylesheet' id='hotspot-css-css' href='http://www.inkthemes.com/wp-content/plugins/hotspot-map/css/hotspot.css?ver=1.0' type='text/css' media='' />
|
|
27
|
+
<link rel='stylesheet' id='ListItStyles-css' href='http://www.inkthemes.com/wp-content/plugins/wp-listit/listit-style.css?ver=3.4.2' type='text/css' media='all' />
|
|
28
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/jquery-1.6.1.min.js?ver=1.7.1'></script>
|
|
29
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/ddsmoothmenu.js?ver=3.4.2'></script>
|
|
30
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/slides.min.jquery.js?ver=3.4.2'></script>
|
|
31
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/jquery.colorbox.js?ver=3.4.2'></script>
|
|
32
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/js/custom.js?ver=3.4.2'></script>
|
|
33
|
+
<script type='text/javascript' src='http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-includes/js/comment-reply.js?ver=3.4.2'></script><link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://www.inkthemes.com/xmlrpc.php?rsd" /><link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://www.inkthemes.com/wp-includes/wlwmanifest.xml" /><link rel='prev' title='Elite Pro WordPress Theme' href='http://www.inkthemes.com/wp-themes/elite-pro-wordpress-theme/' /><link rel='next' title='BizWay Theme Preview' href='http://www.inkthemes.com/bizway-theme-preview/' />
|
|
34
|
+
<meta name="generator" content="WordPress 3.4.2" />
|
|
35
|
+
<link rel="canonical" href="http://www.inkthemes.com/colorway-theme-previews/" />
|
|
36
|
+
<script type="text/javascript">function woopraReady(tracker){}</script>
|
|
37
|
+
<script type="text/javascript">(function(){var wsc=document.createElement('script');wsc.type='text/javascript';wsc.src=document.location.protocol+'//static.woopra.com/js/woopra.js';wsc.async=true;var ssc=document.getElementsByTagName('script')[0];ssc.parentNode.insertBefore(wsc,ssc);})();</script>
|
|
38
|
+
<link rel="shortcut icon" href="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/02/favicon.ico"/> <style type="text/css">.sidebar .block_wrap
|
|
39
|
+
.block_content{font-size:13px;line-height:22px}.sidebar .block_wrap
|
|
40
|
+
.quote_name{width:295px}.gform_wrapper
|
|
41
|
+
ul.gform_fields{margin:-15px}h1.homeheading{font-size:18px;font-weight:normal}.pricing_wrapper .c_pricing .member_price
|
|
42
|
+
.m_top{height:200px}.content_wrap .post
|
|
43
|
+
img{padding:4px;margin-top:2px;margin-bottom:2px;-webkit-box-shadow:0 0 2px 2px #C2B9B9;box-shadow:0 0 2px 2px #C2B9B9}</style><!--[if gte IE 9]><script type="text/javascript">Cufon.set('engine','canvas');</script><![endif]--><script type="text/javascript">var hide_awf_Form=true;</script>
|
|
44
|
+
</head>
|
|
45
|
+
<body class="page page-id-3326 page-template page-template-template-theme-preview-php" style="background:url()"><div id="main_header"><div class="container_24"><div class="grid_24 header_wrapper"><div class="header"><div class="grid_6 alpha"><div class="logo_wrap"><div class="logo"><a href="http://www.inkthemes.com"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/images/logo.png" alt="InkThemes" /></a></div></div></div><div class="grid_18 omega"><div class="top_banner"><div class="menu_wrapper"><div id="menu"><ul id="menu-main-menu" class="ddsmoothmenu"><li class="page_item page-item-15"><a href="http://www.inkthemes.com/wp-themes/">Browse Themes</a></li><li class="page_item page-item-1766"><a href="http://www.inkthemes.com/pricing/">Pricing</a></li><li class="page_item page-item-1792"><a href="http://www.inkthemes.com/features/">Features</a></li><li class="page_item page-item-295"><a href="http://www.inkthemes.com/support/">Support</a></li><li class="page_item page-item-677"><a href="http://www.inkthemes.com/contact-us/">Contact Us</a></li><li class="page_item page-item-2318"><a href="http://www.inkthemes.com/blog/">Blog</a></li><li><a href="http://inkthemes.com/members/member/index">Login</a></li></ul></div></div></div></div><div class="clear"></div></div></div></div></div><div class="clear"></div><div class="top_line"></div><div class="clear"></div><div class="feature_wrapper page"><div class="container_24"><div class="grid_24"><h1 class="page_title"> Colorway Theme Previews (Multiple Examples)</h1></div></div></div><div class="clear"></div><div class="bottom_line"></div><div class="clear"></div><div class="container_24"><div class="grid_24 content_wrapper"><div class="fullwidth theme_preview"><ul class="thumbnails"><li><a href="http://inkthemes.com/wpthemes/colorwayfurniture/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview11.jpg"/></a><p>Colorway for Hotels and Restaurant Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwaypets/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview21.jpg"/></a><p>Colorway for Pets & Animals Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwayngo/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview31.jpg"/></a><p>Colorway for Non Profits Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwaysports/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview41.jpg"/></a><p>Colorway for Adventure & Sports Niche</p></li><li><a href="http://inkthemes.com/wpthemes/colorwayrestaurant/" target="_blank"><img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/uploads/2012/06/colorwaypreview51.jpg"/></a><p>Colorway for Food & Catering Niche</p></li></ul></div></div></div><div class="clear"></div><div class="footer_wrapper"><div class="container_24"><div class="grid_24 footer"><div class="grid_7 footer_widget alpha"> <a href="#"><div class="theme_logo"></div> </a></div><div class="grid_6 footer_widget"><div class="buyers widget"><h6>BUYERS</h6><ul class="buy-ers"><li><a target="_blank" href="http://www.inkthemes.com/support/faq/">FAQ</a></li><li><a target="_blank" href="http://www.inkthemes.com/affiliates/">Join Affiliate</a></li><li><a target="_blank" href="http://www.inkthemes.com/wp-themes/">Browse Themes</a></li><li><a target="_blank" href="http://www.inkthemes.com/features/">Our Features</a></li><li><a target="_blank" href="http://www.inkthemes.com/community/">Support Forum</a></li><li><a target="_blank" href="http://www.inkthemes.com/support/terms-and-conditions/">Terms & Conditions</a></li></ul></div></div><div class="grid_6 footer_widget"><div class="joinus widget"><h6>JOIN US</h6><ul class="social"><li class="twitter"><a target="_blank" href="https://twitter.com/inkthemes">Follow us on Twitter</a></li><li class="facebook"><a target="_blank" href="http://www.facebook.com/InkThemes">Be a fan on Facebook</a></li></ul> <br/><h6>Links</h6><p><a href="http://www.inkthemes.com/">Premium Wordpress Themes</a></p></div></div><div class="grid_5 omega footer_widget"><div class="footer_paypal widget"> <img src="http://inkthemesmainsite.inkthemes.netdna-cdn.com/wp-content/themes/inkthemes/images/paypal.png"/></div></div></div></div></div><div class="clear"></div><div class="footer_bottom"><div class="container_24"><div class="grid_24"><div class="copyright"><p>2012 © InkThemes. All rights reserved.</p></div></div></div></div><script type="text/javascript">var _gaq=_gaq||[];_gaq.push(['_setAccount','UA-24189791-1']);_gaq.push(['_trackPageview']);(function(){var ga=document.createElement('script');ga.type='text/javascript';ga.async=true;ga.src=('https:'==document.location.protocol?'https://ssl':'http://www')+'.google-analytics.com/ga.js';var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(ga,s);})();</script> <script type="text/javascript">function woopraReady(tracker){tracker.setDomain('inkthemes.com');tracker.setIdleTimeout(300000);tracker.track();return false;};</script>
|
|
46
|
+
</body>
|
|
47
|
+
<script>(function($,undefined){$(document).ready(function(){$('#hotspot-707').hotspot({'show_on':"mouseover",});$('#hotspot-1470').hotspot({'show_on':"mouseover",});});})(jQuery);</script><script type='text/javascript' src='http://www.inkthemes.com/wp-content/plugins/hotspot-map/js/hotspot.js?ver=1.0'></script>
|
|
48
|
+
</html>
|
data/spec/metainspector_spec.rb
CHANGED
|
@@ -4,6 +4,7 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
|
|
|
4
4
|
|
|
5
5
|
describe MetaInspector do
|
|
6
6
|
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
|
7
|
+
FakeWeb.register_uri(:get, "pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
|
7
8
|
FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
|
|
8
9
|
FakeWeb.register_uri(:get, "http://alazan.com/websolution.asp", :response => fixture_file("alazan_websolution.response"))
|
|
9
10
|
FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
|
|
@@ -22,6 +23,7 @@ describe MetaInspector do
|
|
|
22
23
|
FakeWeb.register_uri(:get, "http://charset000.com", :response => fixture_file("charset_000.response"))
|
|
23
24
|
FakeWeb.register_uri(:get, "http://charset001.com", :response => fixture_file("charset_001.response"))
|
|
24
25
|
FakeWeb.register_uri(:get, "http://charset002.com", :response => fixture_file("charset_002.response"))
|
|
26
|
+
FakeWeb.register_uri(:get, "http://www.inkthemes.com/", :response => fixture_file("wordpress_site.response"))
|
|
25
27
|
|
|
26
28
|
describe 'Initialization' do
|
|
27
29
|
it 'should accept an URL with a scheme' do
|
|
@@ -37,16 +39,19 @@ describe MetaInspector do
|
|
|
37
39
|
it "should store the scheme" do
|
|
38
40
|
MetaInspector.new('http://pagerankalert.com').scheme.should == 'http'
|
|
39
41
|
MetaInspector.new('https://pagerankalert.com').scheme.should == 'https'
|
|
42
|
+
MetaInspector.new('pagerankalert.com').scheme.should == 'http'
|
|
40
43
|
end
|
|
41
44
|
|
|
42
45
|
it "should store the host" do
|
|
43
46
|
MetaInspector.new('http://pagerankalert.com').host.should == 'pagerankalert.com'
|
|
44
47
|
MetaInspector.new('https://pagerankalert.com').host.should == 'pagerankalert.com'
|
|
48
|
+
MetaInspector.new('pagerankalert.com').host.should == 'pagerankalert.com'
|
|
45
49
|
end
|
|
46
50
|
|
|
47
51
|
it "should store the root url" do
|
|
48
52
|
MetaInspector.new('http://pagerankalert.com').root_url.should == 'http://pagerankalert.com/'
|
|
49
53
|
MetaInspector.new('https://pagerankalert.com').root_url.should == 'https://pagerankalert.com/'
|
|
54
|
+
MetaInspector.new('pagerankalert.com').root_url.should == 'http://pagerankalert.com/'
|
|
50
55
|
end
|
|
51
56
|
end
|
|
52
57
|
|
|
@@ -256,15 +261,15 @@ describe MetaInspector do
|
|
|
256
261
|
@m.meta_Csrf_pAram.should == "authenticity_token"
|
|
257
262
|
end
|
|
258
263
|
|
|
259
|
-
it "should get the generator meta tag" do
|
|
260
|
-
pending "mocks"
|
|
261
|
-
@m.meta_generator.should == 'WordPress 2.8.4'
|
|
262
|
-
end
|
|
263
|
-
|
|
264
264
|
it "should return nil for nonfound meta_tags" do
|
|
265
265
|
@m.meta_lollypop.should == nil
|
|
266
266
|
end
|
|
267
267
|
|
|
268
|
+
it "should get the generator meta tag" do
|
|
269
|
+
@m = MetaInspector.new('http://www.inkthemes.com/')
|
|
270
|
+
@m.meta_generator.should == 'WordPress 3.4.2'
|
|
271
|
+
end
|
|
272
|
+
|
|
268
273
|
it "should find a meta_og_title" do
|
|
269
274
|
@m = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
|
270
275
|
@m.meta_og_title.should == "Apple Claims New iPhone Only Visible To Most Loyal Of Customers"
|
|
@@ -332,7 +337,7 @@ describe MetaInspector do
|
|
|
332
337
|
|
|
333
338
|
describe "parsed?" do
|
|
334
339
|
it "should return true if we have a parsed document" do
|
|
335
|
-
good = MetaInspector.new('
|
|
340
|
+
good = MetaInspector.new('http://pagerankalert.com')
|
|
336
341
|
title = good.title
|
|
337
342
|
|
|
338
343
|
good.parsed?.should == true
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: metainspector
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 37
|
|
5
5
|
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 1
|
|
8
8
|
- 9
|
|
9
|
-
-
|
|
10
|
-
version: 1.9.
|
|
9
|
+
- 11
|
|
10
|
+
version: 1.9.11
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Jaime Iniesta
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2012-09
|
|
18
|
+
date: 2012-11-09 00:00:00 Z
|
|
19
19
|
dependencies:
|
|
20
20
|
- !ruby/object:Gem::Dependency
|
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
|
@@ -154,6 +154,7 @@ files:
|
|
|
154
154
|
- spec/fixtures/theonion.com.response
|
|
155
155
|
- spec/fixtures/twitter_w3clove.response
|
|
156
156
|
- spec/fixtures/w3clove_faqs.response
|
|
157
|
+
- spec/fixtures/wordpress_site.response
|
|
157
158
|
- spec/fixtures/youtube.response
|
|
158
159
|
- spec/metainspector_spec.rb
|
|
159
160
|
- spec/spec_helper.rb
|