sitemap_reader 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sitemap_reader.rb +22 -6
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b49c90d95acec5cc0830fdf516552a3888030e8d
|
4
|
+
data.tar.gz: c7e3b150a40b4322b8f739ad0cee04f684b15edf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cb3a08d901a1b4ba21b8c583cc649ce1a79db95032911bbd61a0a10acf580303df3c330fbbee8b36f35360f5c5a2b63e68564ed6d0171187a96d9db1e4bb15fb
|
7
|
+
data.tar.gz: 621ba9f8eafcb3d25681a1b97421368dad3530bf389183fd066f6c033c7b2cf1ebb4a4cea51dc0145e7288d4164e345ab743900b63476f70f77dc9525fe06072
|
data/lib/sitemap_reader.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
+
require 'w3c_datetime'
|
2
3
|
|
3
4
|
# Parse sitemap
|
4
5
|
#
|
@@ -15,19 +16,19 @@ class SitemapReader
|
|
15
16
|
# file_or_url: (String)
|
16
17
|
def initialize(file_or_url)
|
17
18
|
@doc = Nokogiri::XML(get_sitemap(file_or_url))
|
18
|
-
@urls= get_urls
|
19
19
|
end
|
20
20
|
|
21
21
|
def get_urls
|
22
22
|
@doc.css('url').map do |u|
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
{
|
24
|
+
loc: u.css('loc').first.content,
|
25
|
+
lastmod: url_lastmod(u.css('lastmod').first),
|
26
|
+
changefreq: url_changefreq(u.css('changefreq').first),
|
27
|
+
priority: url_priority(u.css('priority').first)
|
28
|
+
}
|
26
29
|
end
|
27
30
|
end
|
28
31
|
|
29
|
-
private
|
30
|
-
|
31
32
|
def get_sitemap(file_or_url)
|
32
33
|
if File.exist?(file_or_url)
|
33
34
|
File.open(file_or_url)
|
@@ -36,4 +37,19 @@ class SitemapReader
|
|
36
37
|
open(file_or_url)
|
37
38
|
end
|
38
39
|
end
|
40
|
+
|
41
|
+
def url_changefreq(changefreq)
|
42
|
+
changefreq.content unless changefreq.nil?
|
43
|
+
end
|
44
|
+
|
45
|
+
def url_priority(priority)
|
46
|
+
priority.content.to_f unless priority.nil?
|
47
|
+
end
|
48
|
+
|
49
|
+
def url_lastmod(lastmod)
|
50
|
+
begin
|
51
|
+
W3cDatetime::parse(lastmod.content) unless lastmod.nil?
|
52
|
+
rescue ArgumentError
|
53
|
+
end
|
54
|
+
end
|
39
55
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michal Pawlowski
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: w3c_datetime
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|