sitemap_treemaker 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/README.md +44 -0
- data/lib/sitemap_treemaker.rb +36 -0
- data/lib/sitemap_treemaker/version.rb +3 -0
- metadata +128 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2011, Miles Z. Sterrett
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Sitemap Treemaker
|
2
|
+
... 'cause all the clever names are taken, probably.
|
3
|
+
|
4
|
+
**Problem**: I've got a basic sitemap.xml. It looks like this:
|
5
|
+
|
6
|
+
```xml
|
7
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
8
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
9
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
10
|
+
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
11
|
+
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
|
12
|
+
|
13
|
+
<url><loc>http://someplace.org/</loc><changefreq>always</changefreq><priority>0.5</priority></url>
|
14
|
+
<url><loc>http://someplace.org/about-someplace/</loc><changefreq>always</changefreq><priority>0.5</priority></url>
|
15
|
+
<url><loc>http://someplace.org/about-someplace/board-of-directors/</loc><changefreq>always</changefreq><priority>0.5</priority></url>
|
16
|
+
</urlset>
|
17
|
+
```
|
18
|
+
|
19
|
+
I need to build a tree out of all of these clearly hierarchal links. So, I do this:
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
|
23
|
+
sitemap_tree = SitemapTreemaker.new('http://someplace.org/sitemap.xml')
|
24
|
+
sitemap_tree.tree
|
25
|
+
```
|
26
|
+
|
27
|
+
`tree` returns a `Tree::TreeNode` from the library [RubyTree](http://rubytree.rubyforge.org/). I can now take that tree and do something useful with it, maybe. Or, I could just print a pretty diagram:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
sitemap_tree = SitemapTreemaker.new('http://someplace.org/sitemap.xml')
|
31
|
+
sitemap_tree.print_tree_diagram
|
32
|
+
|
33
|
+
#=>
|
34
|
+
* root
|
35
|
+
|---+ about-someplace
|
36
|
+
| |---> board-of-directors
|
37
|
+
|
38
|
+
## TODO
|
39
|
+
|
40
|
+
_???_
|
41
|
+
|
42
|
+
I'm sure there are other things that will need to happen in this gem. As I use it more, I'm sure it'll get fleshed out. If you've got ideas, send them to me by writing them with a thick black marker on the label on a Dogfish Head 90 Minute IPA and mailing it to me or something. If you're also from Indiana, or one of the other states that Dogfish Head no longer serves... I don't know. Email me, or whatever.
|
43
|
+
|
44
|
+
Also, pull requests are welcomed. Prepare for poor response times, though.
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'tree'
|
5
|
+
|
6
|
+
class SitemapTreemaker
|
7
|
+
attr_accessor :sitemap_url, :xml, :urls
|
8
|
+
def initialize(sitemap_url)
|
9
|
+
self.sitemap_url = sitemap_url
|
10
|
+
self.xml = Net::HTTP.get_response(URI.parse(self.sitemap_url)).body
|
11
|
+
self
|
12
|
+
end
|
13
|
+
|
14
|
+
def tree
|
15
|
+
return @tree unless @tree.nil?
|
16
|
+
nokogiri = Nokogiri::XML(xml)
|
17
|
+
urls = nokogiri.css('url loc').collect {|node| node.text }
|
18
|
+
@tree = Tree::TreeNode.new('root', sitemap_url)
|
19
|
+
urls.each do |url|
|
20
|
+
parts = URI.parse(url.gsub(/\s\//,'/')).path.split('/').select {|p| p.strip != '' }
|
21
|
+
branch = tree
|
22
|
+
parts.each do |part|
|
23
|
+
if branch[part].nil?
|
24
|
+
branch = branch.add(Tree::TreeNode.new(part, part))
|
25
|
+
else
|
26
|
+
branch = branch[part]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
@tree
|
31
|
+
end
|
32
|
+
|
33
|
+
def print_tree_diagram
|
34
|
+
@tree.print_tree
|
35
|
+
end
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sitemap_treemaker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.0
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Miles Z. Sterrett
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-06-27 00:00:00 -04:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rake
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
type: :development
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rocco
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: rr
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
prerelease: false
|
48
|
+
version_requirements: *id003
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: turn
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
type: :development
|
58
|
+
prerelease: false
|
59
|
+
version_requirements: *id004
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: nokogiri
|
62
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: *id005
|
71
|
+
- !ruby/object:Gem::Dependency
|
72
|
+
name: rubytree
|
73
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: "0"
|
79
|
+
type: :runtime
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: *id006
|
82
|
+
description: So, you've got a sitemap.xml, and you want a hierarchal representation of the URLs within, right? Well, here's your huckleberry.
|
83
|
+
email:
|
84
|
+
- miles@mileszs.com
|
85
|
+
executables: []
|
86
|
+
|
87
|
+
extensions: []
|
88
|
+
|
89
|
+
extra_rdoc_files: []
|
90
|
+
|
91
|
+
files:
|
92
|
+
- lib/sitemap_treemaker/version.rb
|
93
|
+
- lib/sitemap_treemaker.rb
|
94
|
+
- LICENSE
|
95
|
+
- README.md
|
96
|
+
has_rdoc: true
|
97
|
+
homepage: http://github.com/mileszs/sitemap_treemaker
|
98
|
+
licenses: []
|
99
|
+
|
100
|
+
post_install_message:
|
101
|
+
rdoc_options: []
|
102
|
+
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
none: false
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
hash: 2517387609628199753
|
111
|
+
segments:
|
112
|
+
- 0
|
113
|
+
version: "0"
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 1.3.6
|
120
|
+
requirements: []
|
121
|
+
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 1.6.2
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: Grow a tree from a sitemap. It's eco-friendly despite the XML garbage!
|
127
|
+
test_files: []
|
128
|
+
|