w3clove 0.4.5 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/w3clove/sitemap.rb +4 -2
- data/lib/w3clove/version.rb +1 -1
- data/spec/samples/zigotica.com.html +37 -0
- data/spec/sitemap_spec.rb +10 -2
- metadata +4 -4
data/lib/w3clove/sitemap.rb
CHANGED
@@ -43,15 +43,17 @@ module W3Clove
|
|
43
43
|
private
|
44
44
|
|
45
45
|
# Scrapes the url in search of links.
|
46
|
+
#
|
46
47
|
# It first assumes it's an XML sitemap; if no locations found, it will try to
|
47
48
|
# scrape the links from HTML.
|
49
|
+
#
|
48
50
|
# For HTML sources, it will only get the links that start with the sitemap url, convert relative links
|
49
|
-
# to absolute links,
|
51
|
+
# to absolute links, remove anchors from links, and include the sitemap url
|
50
52
|
def pages_in_sitemap
|
51
53
|
pages = xml_locations.map {|loc| W3Clove::Page.new(loc.text)}
|
52
54
|
if pages.empty?
|
53
55
|
m = MetaInspector.new(url)
|
54
|
-
links = m.absolute_links.select {|l| l.start_with?(m.url)}.map {|l| l.split('#')[0]}.uniq
|
56
|
+
links = ([m.url] + m.absolute_links.select {|l| l.start_with?(m.url)}.map {|l| l.split('#')[0]}).uniq
|
55
57
|
pages = links.map {|link| W3Clove::Page.new(link)}
|
56
58
|
end
|
57
59
|
pages
|
data/lib/w3clove/version.rb
CHANGED
@@ -0,0 +1,37 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" />
|
6
|
+
<title>Sergi Meseguer aka zigotica, home</title>
|
7
|
+
<link rel="stylesheet" href="css/style.css">
|
8
|
+
</head>
|
9
|
+
|
10
|
+
<body>
|
11
|
+
<div id="wrapper">
|
12
|
+
<img src="img/avatar.jpg" alt="avatar">
|
13
|
+
<section>
|
14
|
+
<h1>Sergi Meseguer</h1>
|
15
|
+
<p>I am an enthusiast developer who codes front and back end solutions.</p>
|
16
|
+
<p class="summary">Follow me:</p>
|
17
|
+
|
18
|
+
<ul>
|
19
|
+
<li><a href="https://github.com/zigotica"><img src="img/github-32x32.png" alt="github"></a></li>
|
20
|
+
<li><a href="http://twitter.com/zigotica"><img src="img/twitter-32x32.png" alt="twitter"></a></li>
|
21
|
+
<li><a href="http://zigotica.tumblr.com/"><img src="img/tumblr-32x32.png" alt="tumblr"></a></li>
|
22
|
+
<li><a href="https://plus.google.com/113605874723194545353"><img src="img/gplus-32x32.png" alt="Google+"></a></li>
|
23
|
+
<li><a href="http://www.linkedin.com/in/sergimeseguer"><img src="img/linkedin-32x32.png" alt="linkedin"></a></li>
|
24
|
+
</ul>
|
25
|
+
</section>
|
26
|
+
<footer><small>Contents and image © 2011, Sergi Meseguer. All rights reserved.</small></footer>
|
27
|
+
</div>
|
28
|
+
<script type="text/javascript">
|
29
|
+
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
30
|
+
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
31
|
+
</script>
|
32
|
+
<script type="text/javascript">
|
33
|
+
var pageTracker = _gat._getTracker("UA-3717420-1");
|
34
|
+
pageTracker._trackPageview();
|
35
|
+
</script>
|
36
|
+
</body>
|
37
|
+
</html>
|
data/spec/sitemap_spec.rb
CHANGED
@@ -10,6 +10,9 @@ describe W3Clove::Sitemap do
|
|
10
10
|
@sitemap_html = W3Clove::Sitemap.new('http://guides.rubyonrails.org')
|
11
11
|
@sitemap_html.stub!(:doc).and_return(open("#{$samples_dir}/guides.rubyonrails.org.html"))
|
12
12
|
|
13
|
+
@sitemap_no_links = W3Clove::Sitemap.new('http://zigotica.com')
|
14
|
+
@sitemap_no_links.stub!(:doc).and_return(open("#{$samples_dir}/zigotica.com.html"))
|
15
|
+
|
13
16
|
MarkupValidator.any_instance.stubs(:validate_uri).returns(stubbed_validator_results)
|
14
17
|
end
|
15
18
|
|
@@ -26,8 +29,13 @@ describe W3Clove::Sitemap do
|
|
26
29
|
end
|
27
30
|
|
28
31
|
it "should get pages from the sample guides.rubyonrails.org site" do
|
29
|
-
@sitemap_html.pages.length.should ==
|
30
|
-
@sitemap_html.pages.map {|p| p.url}.should == ["http://guides.rubyonrails.org/index.html", "http://guides.rubyonrails.org/", "http://guides.rubyonrails.org/getting_started.html", "http://guides.rubyonrails.org/migrations.html", "http://guides.rubyonrails.org/active_record_validations_callbacks.html", "http://guides.rubyonrails.org/association_basics.html", "http://guides.rubyonrails.org/active_record_querying.html", "http://guides.rubyonrails.org/layouts_and_rendering.html", "http://guides.rubyonrails.org/form_helpers.html", "http://guides.rubyonrails.org/action_controller_overview.html", "http://guides.rubyonrails.org/routing.html", "http://guides.rubyonrails.org/active_support_core_extensions.html", "http://guides.rubyonrails.org/i18n.html", "http://guides.rubyonrails.org/action_mailer_basics.html", "http://guides.rubyonrails.org/testing.html", "http://guides.rubyonrails.org/security.html", "http://guides.rubyonrails.org/debugging_rails_applications.html", "http://guides.rubyonrails.org/performance_testing.html", "http://guides.rubyonrails.org/configuring.html", "http://guides.rubyonrails.org/command_line.html", "http://guides.rubyonrails.org/caching_with_rails.html", "http://guides.rubyonrails.org/asset_pipeline.html", "http://guides.rubyonrails.org/plugins.html", "http://guides.rubyonrails.org/rails_on_rack.html", "http://guides.rubyonrails.org/generators.html", "http://guides.rubyonrails.org/contributing_to_ruby_on_rails.html", "http://guides.rubyonrails.org/api_documentation_guidelines.html", "http://guides.rubyonrails.org/ruby_on_rails_guides_guidelines.html", "http://guides.rubyonrails.org/3_1_release_notes.html", "http://guides.rubyonrails.org/3_0_release_notes.html", "http://guides.rubyonrails.org/2_3_release_notes.html", "http://guides.rubyonrails.org/2_2_release_notes.html", "http://guides.rubyonrails.org/credits.html", "http://guides.rubyonrails.org/v2.3.11/"]
|
32
|
+
@sitemap_html.pages.length.should == 35
|
33
|
+
@sitemap_html.pages.map {|p| p.url}.should == ["http://guides.rubyonrails.org", "http://guides.rubyonrails.org/index.html", "http://guides.rubyonrails.org/", "http://guides.rubyonrails.org/getting_started.html", "http://guides.rubyonrails.org/migrations.html", "http://guides.rubyonrails.org/active_record_validations_callbacks.html", "http://guides.rubyonrails.org/association_basics.html", "http://guides.rubyonrails.org/active_record_querying.html", "http://guides.rubyonrails.org/layouts_and_rendering.html", "http://guides.rubyonrails.org/form_helpers.html", "http://guides.rubyonrails.org/action_controller_overview.html", "http://guides.rubyonrails.org/routing.html", "http://guides.rubyonrails.org/active_support_core_extensions.html", "http://guides.rubyonrails.org/i18n.html", "http://guides.rubyonrails.org/action_mailer_basics.html", "http://guides.rubyonrails.org/testing.html", "http://guides.rubyonrails.org/security.html", "http://guides.rubyonrails.org/debugging_rails_applications.html", "http://guides.rubyonrails.org/performance_testing.html", "http://guides.rubyonrails.org/configuring.html", "http://guides.rubyonrails.org/command_line.html", "http://guides.rubyonrails.org/caching_with_rails.html", "http://guides.rubyonrails.org/asset_pipeline.html", "http://guides.rubyonrails.org/plugins.html", "http://guides.rubyonrails.org/rails_on_rack.html", "http://guides.rubyonrails.org/generators.html", "http://guides.rubyonrails.org/contributing_to_ruby_on_rails.html", "http://guides.rubyonrails.org/api_documentation_guidelines.html", "http://guides.rubyonrails.org/ruby_on_rails_guides_guidelines.html", "http://guides.rubyonrails.org/3_1_release_notes.html", "http://guides.rubyonrails.org/3_0_release_notes.html", "http://guides.rubyonrails.org/2_3_release_notes.html", "http://guides.rubyonrails.org/2_2_release_notes.html", "http://guides.rubyonrails.org/credits.html", "http://guides.rubyonrails.org/v2.3.11/"]
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should include sitemap url at least, even if no links were found" do
|
37
|
+
@sitemap_no_links.pages.length.should == 1
|
38
|
+
@sitemap_no_links.pages[0].url.should == 'http://zigotica.com'
|
31
39
|
end
|
32
40
|
end
|
33
41
|
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: w3clove
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 1
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 4
|
9
8
|
- 5
|
10
|
-
version: 0.
|
9
|
+
version: "0.5"
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Jaime Iniesta
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date: 2011-11-
|
17
|
+
date: 2011-11-22 00:00:00 Z
|
19
18
|
dependencies:
|
20
19
|
- !ruby/object:Gem::Dependency
|
21
20
|
name: w3c_validators
|
@@ -147,6 +146,7 @@ files:
|
|
147
146
|
- spec/samples/absolute_links.html
|
148
147
|
- spec/samples/guides.rubyonrails.org.html
|
149
148
|
- spec/samples/sitemap.xml
|
149
|
+
- spec/samples/zigotica.com.html
|
150
150
|
- spec/sitemap_spec.rb
|
151
151
|
- spec/spec_helper.rb
|
152
152
|
- w3clove.gemspec
|