unsitemap 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/sitemap.rb +33 -0
  2. metadata +77 -0
data/lib/sitemap.rb ADDED
@@ -0,0 +1,33 @@
1
+ require "open-uri"
2
+ require "nokogiri"
3
+ require "zlib"
4
+
5
+ class Sitemap
6
+ def initialize(uri)
7
+ @uri = uri
8
+ end
9
+
10
+ def each(&block)
11
+ sitemap = open(@uri) do |f|
12
+ f = Zlib::GzipReader.new(f) if @uri =~ /\.gz\Z/
13
+ Nokogiri::XML(f)
14
+ end
15
+
16
+ case sitemap.root.name
17
+ when "urlset"
18
+ sitemap.xpath("/s:urlset/s:url/s:loc", namespaces).each do |element|
19
+ yield element.text
20
+ end
21
+ when "sitemapindex"
22
+ sitemap.xpath("/s:sitemapindex/s:sitemap/s:loc", namespaces).each do |element|
23
+ Sitemap.new(element.text).each(&block)
24
+ end
25
+ else
26
+ raise "Unknown sitemap format"
27
+ end
28
+ end
29
+
30
+ def namespaces
31
+ { "s" => "http://www.sitemaps.org/schemas/sitemap/0.9" }
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: unsitemap
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sam Goldman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description:
47
+ email:
48
+ executables: []
49
+ extensions: []
50
+ extra_rdoc_files: []
51
+ files:
52
+ - lib/sitemap.rb
53
+ homepage: https://github.com/samwgoldman/unsitemap
54
+ licenses: []
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 1.8.24
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Sitemap Consumer
77
+ test_files: []