sitemap-parser 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/sitemap-parser.rb +18 -9
  3. metadata +18 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d03dd8db51f6b4d2f7fc07f1bf82a8208e09879e
4
- data.tar.gz: 36e811e0184838fdb0332fc5914b5ef232f6ee87
3
+ metadata.gz: 1315feae0e6664ba1047dca9682663ab2854c9d9
4
+ data.tar.gz: 345c43faad3c00fce3bec650da8e0b1b9bc7903a
5
5
  SHA512:
6
- metadata.gz: 49c136b2f86b00dffbf9c565a7284ea1349a68f752dae6e1cab96b8135ab69955091f9fbb2fbd2d32880d3bc5cde7b8a29bf8eca42c2e954f09a4f4a306d7e76
7
- data.tar.gz: d5241537a66a454a6097a6c88bc3e1e643535c0a085d98f935c13aea902c5b5bf094042fbc2a140c437d548e1e9e9864dda05f21a42113452ce811499b7bd7cc
6
+ metadata.gz: b9656163ef064c3789b062deb5bff162676a426aa022a3bafe7926c4cb513207f2360920df1a81f5279cb352e6a0709a707d38cefa094ff6c038ba6a7714d15b
7
+ data.tar.gz: c8b3f196785154496d6cfa6966b0930ccd9afe115b665e962bc41042cd53de8430f03924055e4c01dd493ef0d46542075c8b636d9b9c6f989d49e5c117504da5
@@ -5,7 +5,7 @@ class SitemapParser
5
5
 
6
6
  def initialize(url, opts = {})
7
7
  @url = url
8
- @options = {:followlocation => true}.merge(opts)
8
+ @options = {:followlocation => true, :recurse => false}.merge(opts)
9
9
  end
10
10
 
11
11
  def raw_sitemap
@@ -16,7 +16,7 @@ class SitemapParser
16
16
  if response.success?
17
17
  return response.body
18
18
  else
19
- return nil
19
+ raise "HTTP request to #{@url} failed"
20
20
  end
21
21
  end
22
22
  request.run
@@ -28,19 +28,28 @@ class SitemapParser
28
28
 
29
29
  def sitemap
30
30
  @sitemap ||= Nokogiri::XML(raw_sitemap)
31
- rescue
32
- nil
33
31
  end
34
32
 
35
33
  def urls
36
- sitemap.at("urlset").search("url")
37
- rescue
38
- nil
34
+ if sitemap.at('urlset')
35
+ sitemap.at("urlset").search("url")
36
+ elsif sitemap.at('sitemapindex')
37
+ found_urls = []
38
+ if @options[:recurse]
39
+ sitemap.at('sitemapindex').search('sitemap').each do |sitemap|
40
+ child_sitemap_location = sitemap.at('loc').content
41
+ found_urls << self.class.new(child_sitemap_location, :recurse => false).urls
42
+ end
43
+ end
44
+ return found_urls.flatten
45
+ else
46
+ raise 'Malformed sitemap, no urlset'
47
+ end
39
48
  end
40
49
 
41
50
  def to_a
42
51
  urls.map { |url| url.at("loc").content }
43
- rescue
44
- []
52
+ rescue NoMethodError
53
+ raise 'Malformed sitemap, url without loc'
45
54
  end
46
55
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-05 00:00:00.000000000 Z
11
+ date: 2016-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '4.7'
97
+ - !ruby/object:Gem::Dependency
98
+ name: test-unit
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.1'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.1'
97
111
  description: Ruby Gem to parse sitemaps.org compliant sitemaps.
98
112
  email: ben.balter@github.com
99
113
  executables: []
@@ -121,8 +135,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
121
135
  version: '0'
122
136
  requirements: []
123
137
  rubyforge_project:
124
- rubygems_version: 2.2.0
138
+ rubygems_version: 2.6.2
125
139
  signing_key:
126
140
  specification_version: 4
127
141
  summary: Ruby Gem to parse sitemaps.org compliant sitemaps
128
142
  test_files: []
143
+ has_rdoc: