w3clove 0.4.5 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/w3clove/sitemap.rb +4 -2
- data/lib/w3clove/version.rb +1 -1
- data/spec/samples/zigotica.com.html +37 -0
- data/spec/sitemap_spec.rb +10 -2
- metadata +4 -4
data/lib/w3clove/sitemap.rb
CHANGED
@@ -43,15 +43,17 @@ module W3Clove
|
|
43
43
|
private
|
44
44
|
|
45
45
|
# Scrapes the url in search of links.
|
46
|
+
#
|
46
47
|
# It first assumes it's an XML sitemap; if no locations found, it will try to
|
47
48
|
# scrape the links from HTML.
|
49
|
+
#
|
48
50
|
# For HTML sources, it will only get the links that start with the sitemap url, convert relative links
|
49
|
-
# to absolute links,
|
51
|
+
# to absolute links, remove anchors from links, and include the sitemap url
|
50
52
|
def pages_in_sitemap
|
51
53
|
pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
|
52
54
|
if pages.empty?
|
53
55
|
m = MetaInspector.new(url)
|
54
|
-
links = m.absolute_links.select {|l| l.start_with?(m.url)}.map {|l| l.split('#')[0]}.uniq
|
56
|
+
links = ([m.url] + m.absolute_links.select {|l| l.start_with?(m.url)}.map {|l| l.split('#')[0]}).uniq
|
55
57
|
pages = links.map {|link| W3Clove::Page.new(link)}
|
56
58
|
end
|
57
59
|
pages
|
data/lib/w3clove/version.rb
CHANGED
@@ -0,0 +1,37 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" />
|
6
|
+
<title>Sergi Meseguer aka zigotica, home</title>
|
7
|
+
<link rel="stylesheet" href="css/style.css">
|
8
|
+
</head>
|
9
|
+
|
10
|
+
<body>
|
11
|
+
<div id="wrapper">
|
12
|
+
<img src="img/avatar.jpg" alt="avatar">
|
13
|
+
<section>
|
14
|
+
<h1>Sergi Meseguer</h1>
|
15
|
+
<p>I am an enthusiast developer who codes front and back end solutions.</p>
|
16
|
+
<p class="summary">Follow me:</p>
|
17
|
+
|
18
|
+
<ul>
|
19
|
+
<li><a href="https://github.com/zigotica"><img src="img/github-32x32.png" alt="github"></a></li>
|
20
|
+
<li><a href="http://twitter.com/zigotica"><img src="img/twitter-32x32.png" alt="twitter"></a></li>
|
21
|
+
<li><a href="http://zigotica.tumblr.com/"><img src="img/tumblr-32x32.png" alt="tumblr"></a></li>
|
22
|
+
<li><a href="https://plus.google.com/113605874723194545353"><img src="img/gplus-32x32.png" alt="Google+"></a></li>
|
23
|
+
<li><a href="http://www.linkedin.com/in/sergimeseguer"><img src="img/linkedin-32x32.png" alt="linkedin"></a></li>
|
24
|
+
</ul>
|
25
|
+
</section>
|
26
|
+
<footer><small>Contents and image © 2011, Sergi Meseguer. All rights reserved.</small></footer>
|
27
|
+
</div>
|
28
|
+
<script type="text/javascript">
|
29
|
+
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
30
|
+
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
31
|
+
</script>
|
32
|
+
<script type="text/javascript">
|
33
|
+
var pageTracker = _gat._getTracker("UA-3717420-1");
|
34
|
+
pageTracker._trackPageview();
|
35
|
+
</script>
|
36
|
+
</body>
|
37
|
+
</html>
|
data/spec/sitemap_spec.rb
CHANGED
@@ -10,6 +10,9 @@ describe W3Clove::Sitemap do
|
|
10
10
|
@sitemap_html = W3Clove::Sitemap.new('http://guides.rubyonrails.org')
|
11
11
|
@sitemap_html.stub!(:doc).and_return(open("#{$samples_dir}/guides.rubyonrails.org.html"))
|
12
12
|
|
13
|
+
@sitemap_no_links = W3Clove::Sitemap.new('http://zigotica.com')
|
14
|
+
@sitemap_no_links.stub!(:doc).and_return(open("#{$samples_dir}/zigotica.com.html"))
|
15
|
+
|
13
16
|
MarkupValidator.any_instance.stubs(:validate_uri).returns(stubbed_validator_results)
|
14
17
|
end
|
15
18
|
|
@@ -26,8 +29,13 @@ describe W3Clove::Sitemap do
|
|
26
29
|
end
|
27
30
|
|
28
31
|
it "should get pages from the sample guides.rubyonrails.org site" do
|
29
|
-
@sitemap_html.pages.length.should ==
|
30
|
-
@sitemap_html.pages.map {|p| p.url}.should == ["http://guides.rubyonrails.org/index.html", "http://guides.rubyonrails.org/", "http://guides.rubyonrails.org/getting_started.html", "http://guides.rubyonrails.org/migrations.html", "http://guides.rubyonrails.org/active_record_validations_callbacks.html", "http://guides.rubyonrails.org/association_basics.html", "http://guides.rubyonrails.org/active_record_querying.html", "http://guides.rubyonrails.org/layouts_and_rendering.html", "http://guides.rubyonrails.org/form_helpers.html", "http://guides.rubyonrails.org/action_controller_overview.html", "http://guides.rubyonrails.org/routing.html", "http://guides.rubyonrails.org/active_support_core_extensions.html", "http://guides.rubyonrails.org/i18n.html", "http://guides.rubyonrails.org/action_mailer_basics.html", "http://guides.rubyonrails.org/testing.html", "http://guides.rubyonrails.org/security.html", "http://guides.rubyonrails.org/debugging_rails_applications.html", "http://guides.rubyonrails.org/performance_testing.html", "http://guides.rubyonrails.org/configuring.html", "http://guides.rubyonrails.org/command_line.html", "http://guides.rubyonrails.org/caching_with_rails.html", "http://guides.rubyonrails.org/asset_pipeline.html", "http://guides.rubyonrails.org/plugins.html", "http://guides.rubyonrails.org/rails_on_rack.html", "http://guides.rubyonrails.org/generators.html", "http://guides.rubyonrails.org/contributing_to_ruby_on_rails.html", "http://guides.rubyonrails.org/api_documentation_guidelines.html", "http://guides.rubyonrails.org/ruby_on_rails_guides_guidelines.html", "http://guides.rubyonrails.org/3_1_release_notes.html", "http://guides.rubyonrails.org/3_0_release_notes.html", "http://guides.rubyonrails.org/2_3_release_notes.html", "http://guides.rubyonrails.org/2_2_release_notes.html", "http://guides.rubyonrails.org/credits.html", "http://guides.rubyonrails.org/v2.3.11/"]
|
32
|
+
@sitemap_html.pages.length.should == 35
|
33
|
+
@sitemap_html.pages.map {|p| p.url}.should == ["http://guides.rubyonrails.org", "http://guides.rubyonrails.org/index.html", "http://guides.rubyonrails.org/", "http://guides.rubyonrails.org/getting_started.html", "http://guides.rubyonrails.org/migrations.html", "http://guides.rubyonrails.org/active_record_validations_callbacks.html", "http://guides.rubyonrails.org/association_basics.html", "http://guides.rubyonrails.org/active_record_querying.html", "http://guides.rubyonrails.org/layouts_and_rendering.html", "http://guides.rubyonrails.org/form_helpers.html", "http://guides.rubyonrails.org/action_controller_overview.html", "http://guides.rubyonrails.org/routing.html", "http://guides.rubyonrails.org/active_support_core_extensions.html", "http://guides.rubyonrails.org/i18n.html", "http://guides.rubyonrails.org/action_mailer_basics.html", "http://guides.rubyonrails.org/testing.html", "http://guides.rubyonrails.org/security.html", "http://guides.rubyonrails.org/debugging_rails_applications.html", "http://guides.rubyonrails.org/performance_testing.html", "http://guides.rubyonrails.org/configuring.html", "http://guides.rubyonrails.org/command_line.html", "http://guides.rubyonrails.org/caching_with_rails.html", "http://guides.rubyonrails.org/asset_pipeline.html", "http://guides.rubyonrails.org/plugins.html", "http://guides.rubyonrails.org/rails_on_rack.html", "http://guides.rubyonrails.org/generators.html", "http://guides.rubyonrails.org/contributing_to_ruby_on_rails.html", "http://guides.rubyonrails.org/api_documentation_guidelines.html", "http://guides.rubyonrails.org/ruby_on_rails_guides_guidelines.html", "http://guides.rubyonrails.org/3_1_release_notes.html", "http://guides.rubyonrails.org/3_0_release_notes.html", "http://guides.rubyonrails.org/2_3_release_notes.html", "http://guides.rubyonrails.org/2_2_release_notes.html", "http://guides.rubyonrails.org/credits.html", "http://guides.rubyonrails.org/v2.3.11/"]
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should include sitemap url at least, even if no links were found" do
|
37
|
+
@sitemap_no_links.pages.length.should == 1
|
38
|
+
@sitemap_no_links.pages[0].url.should == 'http://zigotica.com'
|
31
39
|
end
|
32
40
|
end
|
33
41
|
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: w3clove
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 1
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 4
|
9
8
|
- 5
|
10
|
-
version: 0.
|
9
|
+
version: "0.5"
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Jaime Iniesta
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date: 2011-11-
|
17
|
+
date: 2011-11-22 00:00:00 Z
|
19
18
|
dependencies:
|
20
19
|
- !ruby/object:Gem::Dependency
|
21
20
|
name: w3c_validators
|
@@ -147,6 +146,7 @@ files:
|
|
147
146
|
- spec/samples/absolute_links.html
|
148
147
|
- spec/samples/guides.rubyonrails.org.html
|
149
148
|
- spec/samples/sitemap.xml
|
149
|
+
- spec/samples/zigotica.com.html
|
150
150
|
- spec/sitemap_spec.rb
|
151
151
|
- spec/spec_helper.rb
|
152
152
|
- w3clove.gemspec
|