unsitemap 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/sitemap.rb +33 -0
  2. metadata +77 -0
data/lib/sitemap.rb ADDED
@@ -0,0 +1,33 @@
1
+ require "open-uri"
2
+ require "nokogiri"
3
+ require "zlib"
4
+
5
+ class Sitemap
6
+ def initialize(uri)
7
+ @uri = uri
8
+ end
9
+
10
+ def each(&block)
11
+ sitemap = open(@uri) do |f|
12
+ f = Zlib::GzipReader.new(f) if @uri =~ /\.gz\Z/
13
+ Nokogiri::XML(f)
14
+ end
15
+
16
+ case sitemap.root.name
17
+ when "urlset"
18
+ sitemap.xpath("/s:urlset/s:url/s:loc", namespaces).each do |element|
19
+ yield element.text
20
+ end
21
+ when "sitemapindex"
22
+ sitemap.xpath("/s:sitemapindex/s:sitemap/s:loc", namespaces).each do |element|
23
+ Sitemap.new(element.text).each(&block)
24
+ end
25
+ else
26
+ raise "Unknown sitemap format"
27
+ end
28
+ end
29
+
30
+ def namespaces
31
+ { "s" => "http://www.sitemaps.org/schemas/sitemap/0.9" }
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: unsitemap
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sam Goldman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description:
47
+ email:
48
+ executables: []
49
+ extensions: []
50
+ extra_rdoc_files: []
51
+ files:
52
+ - lib/sitemap.rb
53
+ homepage: https://github.com/samwgoldman/unsitemap
54
+ licenses: []
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 1.8.24
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Sitemap Consumer
77
+ test_files: []