sitemap_treemaker 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2011, Miles Z. Sterrett
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # Sitemap Treemaker
2
+ ... 'cause all the clever names are taken, probably.
3
+
4
+ **Problem**: I've got a basic sitemap.xml. It looks like this:
5
+
6
+ ```xml
7
+ <?xml version="1.0" encoding="UTF-8"?>
8
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
9
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
10
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
11
+ http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
12
+
13
+ <url><loc>http://someplace.org/</loc><changefreq>always</changefreq><priority>0.5</priority></url>
14
+ <url><loc>http://someplace.org/about-someplace/</loc><changefreq>always</changefreq><priority>0.5</priority></url>
15
+ <url><loc>http://someplace.org/about-someplace/board-of-directors/</loc><changefreq>always</changefreq><priority>0.5</priority></url>
16
+ </urlset>
17
+ ```
18
+
19
+ I need to build a tree out of all of these clearly hierarchal links. So, I do this:
20
+
21
+ ```ruby
22
+
23
+ sitemap_tree = SitemapTreemaker.new('http://someplace.org/sitemap.xml')
24
+ sitemap_tree.tree
25
+ ```
26
+
27
+ `tree` returns a `Tree::TreeNode` from the library [RubyTree](http://rubytree.rubyforge.org/). I can now take that tree and do something useful with it, maybe. Or, I could just print a pretty diagram:
28
+
29
+ ```ruby
30
+ sitemap_tree = SitemapTreemaker.new('http://someplace.org/sitemap.xml')
31
+ sitemap_tree.print_tree_diagram
32
+
33
+ #=>
34
+ * root
35
+ |---+ about-someplace
36
+ | |---> board-of-directors
37
+
38
+ ## TODO
39
+
40
+ _???_
41
+
42
+ I'm sure there are other things that will need to happen in this gem. As I use it more, I'm sure it'll get fleshed out. If you've got ideas, send them to me by writing them with a thick black marker on the label on a Dogfish Head 90 Minute IPA and mailing it to me or something. If you're also from Indiana, or one of the other states that Dogfish Head no longer serves... I don't know. Email me, or whatever.
43
+
44
+ Also, pull requests are welcomed. Prepare for poor response times, though.
@@ -0,0 +1,36 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'nokogiri'
4
+ require 'tree'
5
+
6
+ class SitemapTreemaker
7
+ attr_accessor :sitemap_url, :xml, :urls
8
+ def initialize(sitemap_url)
9
+ self.sitemap_url = sitemap_url
10
+ self.xml = Net::HTTP.get_response(URI.parse(self.sitemap_url)).body
11
+ self
12
+ end
13
+
14
+ def tree
15
+ return @tree unless @tree.nil?
16
+ nokogiri = Nokogiri::XML(xml)
17
+ urls = nokogiri.css('url loc').collect {|node| node.text }
18
+ @tree = Tree::TreeNode.new('root', sitemap_url)
19
+ urls.each do |url|
20
+ parts = URI.parse(url.gsub(/\s\//,'/')).path.split('/').select {|p| p.strip != '' }
21
+ branch = tree
22
+ parts.each do |part|
23
+ if branch[part].nil?
24
+ branch = branch.add(Tree::TreeNode.new(part, part))
25
+ else
26
+ branch = branch[part]
27
+ end
28
+ end
29
+ end
30
+ @tree
31
+ end
32
+
33
+ def print_tree_diagram
34
+ @tree.print_tree
35
+ end
36
+ end
@@ -0,0 +1,3 @@
1
+ module SitemapTreemaker
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sitemap_treemaker
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.0
6
+ platform: ruby
7
+ authors:
8
+ - Miles Z. Sterrett
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-06-27 00:00:00 -04:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rake
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :development
25
+ prerelease: false
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: rocco
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: rr
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ type: :development
47
+ prerelease: false
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: turn
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ type: :development
58
+ prerelease: false
59
+ version_requirements: *id004
60
+ - !ruby/object:Gem::Dependency
61
+ name: nokogiri
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: *id005
71
+ - !ruby/object:Gem::Dependency
72
+ name: rubytree
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ type: :runtime
80
+ prerelease: false
81
+ version_requirements: *id006
82
+ description: So, you've got a sitemap.xml, and you want a hierarchal representation of the URLs within, right? Well, here's your huckleberry.
83
+ email:
84
+ - miles@mileszs.com
85
+ executables: []
86
+
87
+ extensions: []
88
+
89
+ extra_rdoc_files: []
90
+
91
+ files:
92
+ - lib/sitemap_treemaker/version.rb
93
+ - lib/sitemap_treemaker.rb
94
+ - LICENSE
95
+ - README.md
96
+ has_rdoc: true
97
+ homepage: http://github.com/mileszs/sitemap_treemaker
98
+ licenses: []
99
+
100
+ post_install_message:
101
+ rdoc_options: []
102
+
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ hash: 2517387609628199753
111
+ segments:
112
+ - 0
113
+ version: "0"
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ none: false
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: 1.3.6
120
+ requirements: []
121
+
122
+ rubyforge_project:
123
+ rubygems_version: 1.6.2
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: Grow a tree from a sitemap. It's eco-friendly despite the XML garbage!
127
+ test_files: []
128
+