sitemap_reader 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/sitemap_reader.rb +39 -0
  3. metadata +86 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 30568b1c862fc5439bf0734f084bcc259ac333fa
4
+ data.tar.gz: d94a700a7066fb98451c1e9b79b97b14fd958c71
5
+ SHA512:
6
+ metadata.gz: e95e71e627293bc2d97645e3f9ac0336cad354ddf61d74c9a9f5e5af8075ebace47b4f050a9f86f53f10fff634d0e7efdea90f0d900ab83a0b99a081ff0ae4ea
7
+ data.tar.gz: d0fa3bfeefd221c1cbbe29be4d473f71042e55edab16fc73d68a4f6326272444922afade133ef9683a442546556fd92d9872c209acfffef68ac4918e3c891959
@@ -0,0 +1,39 @@
1
+ require 'nokogiri'
2
+
3
+ # Parse sitemap
4
+ #
5
+ # Example:
6
+ # >> sm = SitemapReader.new('http://example.com/sitemap.xml').get_urls
7
+ # => [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]
8
+ #
9
+ # ... or read from file like this:
10
+ # >> sm = SitemapReader.new('./sitemap.xml').get_urls
11
+ # => [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]
12
+ class SitemapReader
13
+
14
+ # Arguments:
15
+ # file_or_url: (String)
16
+ def initialize(file_or_url)
17
+ @doc = Nokogiri::XML(get_sitemap(file_or_url))
18
+ @urls= get_urls
19
+ end
20
+
21
+ def get_urls
22
+ @doc.css('url').map do |u|
23
+ loc = u.css('loc').first.content
24
+ lastmod = u.css('lastmod').first.content unless u.css('lastmod').first.nil?
25
+ {loc: loc, lastmod: lastmod}
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def get_sitemap(file_or_url)
32
+ if File.exist?(file_or_url)
33
+ File.open(file_or_url)
34
+ else
35
+ require 'open-uri'
36
+ open(file_or_url)
37
+ end
38
+ end
39
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sitemap_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Michal Pawlowski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email: misza@misza.co.uk
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - lib/sitemap_reader.rb
62
+ homepage: https://github.com/itsudo/sitemap_reader
63
+ licenses:
64
+ - MIT
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 2.0.3
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Ruby library for reading sitemaps
86
+ test_files: []