sitemap-parser 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sitemap-parser.rb +18 -9
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1315feae0e6664ba1047dca9682663ab2854c9d9
|
4
|
+
data.tar.gz: 345c43faad3c00fce3bec650da8e0b1b9bc7903a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9656163ef064c3789b062deb5bff162676a426aa022a3bafe7926c4cb513207f2360920df1a81f5279cb352e6a0709a707d38cefa094ff6c038ba6a7714d15b
|
7
|
+
data.tar.gz: c8b3f196785154496d6cfa6966b0930ccd9afe115b665e962bc41042cd53de8430f03924055e4c01dd493ef0d46542075c8b636d9b9c6f989d49e5c117504da5
|
data/lib/sitemap-parser.rb
CHANGED
@@ -5,7 +5,7 @@ class SitemapParser
|
|
5
5
|
|
6
6
|
def initialize(url, opts = {})
|
7
7
|
@url = url
|
8
|
-
@options = {:followlocation => true}.merge(opts)
|
8
|
+
@options = {:followlocation => true, :recurse => false}.merge(opts)
|
9
9
|
end
|
10
10
|
|
11
11
|
def raw_sitemap
|
@@ -16,7 +16,7 @@ class SitemapParser
|
|
16
16
|
if response.success?
|
17
17
|
return response.body
|
18
18
|
else
|
19
|
-
|
19
|
+
raise "HTTP request to #{@url} failed"
|
20
20
|
end
|
21
21
|
end
|
22
22
|
request.run
|
@@ -28,19 +28,28 @@ class SitemapParser
|
|
28
28
|
|
29
29
|
def sitemap
|
30
30
|
@sitemap ||= Nokogiri::XML(raw_sitemap)
|
31
|
-
rescue
|
32
|
-
nil
|
33
31
|
end
|
34
32
|
|
35
33
|
def urls
|
36
|
-
sitemap.at(
|
37
|
-
|
38
|
-
|
34
|
+
if sitemap.at('urlset')
|
35
|
+
sitemap.at("urlset").search("url")
|
36
|
+
elsif sitemap.at('sitemapindex')
|
37
|
+
found_urls = []
|
38
|
+
if @options[:recurse]
|
39
|
+
sitemap.at('sitemapindex').search('sitemap').each do |sitemap|
|
40
|
+
child_sitemap_location = sitemap.at('loc').content
|
41
|
+
found_urls << self.class.new(child_sitemap_location, :recurse => false).urls
|
42
|
+
end
|
43
|
+
end
|
44
|
+
return found_urls.flatten
|
45
|
+
else
|
46
|
+
raise 'Malformed sitemap, no urlset'
|
47
|
+
end
|
39
48
|
end
|
40
49
|
|
41
50
|
def to_a
|
42
51
|
urls.map { |url| url.at("loc").content }
|
43
|
-
rescue
|
44
|
-
|
52
|
+
rescue NoMethodError
|
53
|
+
raise 'Malformed sitemap, url without loc'
|
45
54
|
end
|
46
55
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '4.7'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: test-unit
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '3.1'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '3.1'
|
97
111
|
description: Ruby Gem to parse sitemaps.org compliant sitemaps.
|
98
112
|
email: ben.balter@github.com
|
99
113
|
executables: []
|
@@ -121,8 +135,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
135
|
version: '0'
|
122
136
|
requirements: []
|
123
137
|
rubyforge_project:
|
124
|
-
rubygems_version: 2.2
|
138
|
+
rubygems_version: 2.6.2
|
125
139
|
signing_key:
|
126
140
|
specification_version: 4
|
127
141
|
summary: Ruby Gem to parse sitemaps.org compliant sitemaps
|
128
142
|
test_files: []
|
143
|
+
has_rdoc:
|