sitemap_reader 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/sitemap_reader.rb +22 -6
  3. metadata +16 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 30568b1c862fc5439bf0734f084bcc259ac333fa
4
- data.tar.gz: d94a700a7066fb98451c1e9b79b97b14fd958c71
3
+ metadata.gz: b49c90d95acec5cc0830fdf516552a3888030e8d
4
+ data.tar.gz: c7e3b150a40b4322b8f739ad0cee04f684b15edf
5
5
  SHA512:
6
- metadata.gz: e95e71e627293bc2d97645e3f9ac0336cad354ddf61d74c9a9f5e5af8075ebace47b4f050a9f86f53f10fff634d0e7efdea90f0d900ab83a0b99a081ff0ae4ea
7
- data.tar.gz: d0fa3bfeefd221c1cbbe29be4d473f71042e55edab16fc73d68a4f6326272444922afade133ef9683a442546556fd92d9872c209acfffef68ac4918e3c891959
6
+ metadata.gz: cb3a08d901a1b4ba21b8c583cc649ce1a79db95032911bbd61a0a10acf580303df3c330fbbee8b36f35360f5c5a2b63e68564ed6d0171187a96d9db1e4bb15fb
7
+ data.tar.gz: 621ba9f8eafcb3d25681a1b97421368dad3530bf389183fd066f6c033c7b2cf1ebb4a4cea51dc0145e7288d4164e345ab743900b63476f70f77dc9525fe06072
@@ -1,4 +1,5 @@
1
1
  require 'nokogiri'
2
+ require 'w3c_datetime'
2
3
 
3
4
  # Parse sitemap
4
5
  #
@@ -15,19 +16,19 @@ class SitemapReader
15
16
  # file_or_url: (String)
16
17
  def initialize(file_or_url)
17
18
  @doc = Nokogiri::XML(get_sitemap(file_or_url))
18
- @urls= get_urls
19
19
  end
20
20
 
21
21
  def get_urls
22
22
  @doc.css('url').map do |u|
23
- loc = u.css('loc').first.content
24
- lastmod = u.css('lastmod').first.content unless u.css('lastmod').first.nil?
25
- {loc: loc, lastmod: lastmod}
23
+ {
24
+ loc: u.css('loc').first.content,
25
+ lastmod: url_lastmod(u.css('lastmod').first),
26
+ changefreq: url_changefreq(u.css('changefreq').first),
27
+ priority: url_priority(u.css('priority').first)
28
+ }
26
29
  end
27
30
  end
28
31
 
29
- private
30
-
31
32
  def get_sitemap(file_or_url)
32
33
  if File.exist?(file_or_url)
33
34
  File.open(file_or_url)
@@ -36,4 +37,19 @@ class SitemapReader
36
37
  open(file_or_url)
37
38
  end
38
39
  end
40
+
41
+ def url_changefreq(changefreq)
42
+ changefreq.content unless changefreq.nil?
43
+ end
44
+
45
+ def url_priority(priority)
46
+ priority.content.to_f unless priority.nil?
47
+ end
48
+
49
+ def url_lastmod(lastmod)
50
+ begin
51
+ W3cDatetime::parse(lastmod.content) unless lastmod.nil?
52
+ rescue ArgumentError
53
+ end
54
+ end
39
55
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michal Pawlowski
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-19 00:00:00.000000000 Z
11
+ date: 2013-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: w3c_datetime
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement