static_generator 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in static_generator.gemspec
4
+ gemspec
5
+
6
+
@@ -0,0 +1,32 @@
1
+ h1. Why did i create Static Generator?
2
+
3
+ Because i wanted a simple way to grab the HTML from a site generated with my Ruby on Rails CMS.
4
+ The cool thing with StaticGenerator compared to other solutions (wget), is that when i have an URL like /home/subpage , instead
5
+ of having this file structure:
6
+
7
+ pre. /home
8
+ /subpage.html
9
+
10
+ i have this file structure:
11
+
12
+ pre. /home
13
+ /subpage
14
+ /index.html
15
+
16
+ so all the links in the generated site will stay the same.
17
+
18
+ h2. Usage
19
+
20
+ pre. require 'static_generator'
21
+ crawler = StaticGenerator::Crawler.new({
22
+ :url=>'http://mysite.com/',
23
+ :destination_path => File.expand_path('some/destination/directory'),
24
+ :url_prefix => 'http://mysite.com/'
25
+ })
26
+ crawler.crawl!
27
+
28
+ h2. Author
29
+
30
+ Julien Desrosiers
31
+
32
+
@@ -0,0 +1,22 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
7
+
8
+ namespace(:spec) do
9
+ desc "Run all specs on multiple ruby versions (requires rvm)"
10
+ task(:portability) do
11
+ %w[1.8.6 1.8.7 1.9.2].each do |version|
12
+ system <<-BASH
13
+ bash -c 'source ~/.rvm/scripts/rvm;
14
+ rvm #{version};
15
+ echo "--------- version #{version} ----------\n";
16
+ bundle install;
17
+ rake spec'
18
+ BASH
19
+ end
20
+ end
21
+ end
22
+
@@ -0,0 +1,76 @@
1
+ require 'anemone'
2
+ require 'fileutils'
3
+
4
+ module StaticGenerator
5
+
6
+ class WrongURLPrefixError < ArgumentError
7
+ end
8
+
9
+ class DestinationPathDoesNotExist < ArgumentError
10
+ end
11
+
12
+ class DestinationPathNotWritableError < ArgumentError
13
+ end
14
+
15
+ class Page
16
+
17
+ attr_reader :crawled_page
18
+
19
+ def initialize(crawled_page, url_prefix)
20
+ @crawled_page = crawled_page # from anemone
21
+ @url_prefix = url_prefix
22
+ end
23
+
24
+ def short_path
25
+ @crawled_page.url.to_s[@url_prefix.length, @crawled_page.url.to_s.length]
26
+ end
27
+
28
+ def body
29
+ @crawled_page.body
30
+ end
31
+ end
32
+
33
+ class Crawler
34
+
35
+ attr_reader :pages, :destination_path, :url_prefix
36
+
37
+ def initialize(opts)
38
+ @destination_path = opts[:destination_path]
39
+ @url_prefix = opts[:url_prefix]
40
+ @url = opts[:url]
41
+ if @url_prefix.nil?
42
+ raise WrongURLPrefixError, "Expected an `url_prefix` option for the given URL."
43
+ elsif @url !~ /^#{@url_prefix}/
44
+ raise WrongURLPrefixError, "Expected the `url_prefix` option to exist in the given URL."
45
+ elsif @url_prefix[-1, 1] != '/'
46
+ raise WrongURLPrefixError, "Expected the `url_prefix` to end with a '/'."
47
+ end
48
+
49
+ if ! File.directory? @destination_path
50
+ raise DestinationPathDoesNotExist
51
+ elsif ! File.writable? @destination_path
52
+ raise DestinationPathNotWritableError
53
+ end
54
+ end
55
+
56
+ def crawl!
57
+ @pages = []
58
+ Anemone.crawl(@url) do |anemone|
59
+ anemone.on_every_page do |page|
60
+ @pages << Page.new(page, @url_prefix)
61
+ end
62
+ end
63
+
64
+ generate_folders
65
+ end
66
+
67
+ def generate_folders
68
+ @pages.each{|page|
69
+ directory = File.expand_path(@destination_path)+File::SEPARATOR+(page.short_path.split('/').join(File::SEPARATOR))
70
+ FileUtils.mkdir_p( directory )
71
+ File.open("#{directory}/index.html", 'w') {|f| f.write(page.body) }
72
+ }
73
+ end
74
+ end
75
+
76
+ end
@@ -0,0 +1,3 @@
1
+ module StaticGenerator
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,133 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'ap'
3
+
4
+ module StaticGenerator
5
+ describe Crawler do
6
+ before(:each) do
7
+ @options = {
8
+ :destination_path => File.expand_path('spec/destination_directory'),
9
+ :url_prefix => 'http://www.example.com/'
10
+ }
11
+ end
12
+
13
+ after(:each) do
14
+ FileUtils.rm_rf(Dir.glob(File.expand_path('spec/destination_directory')+File::SEPARATOR+'*'))
15
+ end
16
+
17
+ context 'crawling' do
18
+
19
+ it 'should find a page that links to another pages' do
20
+ pages = []
21
+ pages << FakePage.new('0', :links => '1')
22
+ pages << FakePage.new('1')
23
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
24
+ @crawler.crawl!
25
+
26
+ @crawler.pages.size.should == 2
27
+ end
28
+
29
+ it 'should find a page that links to two other pages' do
30
+ pages = []
31
+ pages << FakePage.new('0', :links => ['1','2'])
32
+ pages << FakePage.new('1')
33
+ pages << FakePage.new('2')
34
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
35
+ @crawler.crawl!
36
+
37
+ @crawler.pages.size.should == 3
38
+ end
39
+
40
+ it 'should have the right prefix for the given url' do
41
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
42
+ @crawler.crawl!
43
+ @crawler.pages[0].short_path.should == 'home'
44
+
45
+ pages = []
46
+ pages << FakePage.new('0', :links => ['0/1'])
47
+ pages << FakePage.new('0/1')
48
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
49
+ @crawler.crawl!
50
+ @crawler.pages[1].short_path.should == '0/1'
51
+
52
+ pages = []
53
+ pages << FakePage.new('root', :links => ['root/subpage/subsubpage'])
54
+ pages << FakePage.new('root/subpage/subsubpage')
55
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
56
+ @crawler.crawl!
57
+ @crawler.pages[1].short_path.should == 'root/subpage/subsubpage'
58
+
59
+ # ensure we have a / at the end
60
+ lambda {
61
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('root').url,:url_prefix => 'http://www.example.com'}))
62
+ }.should raise_error WrongURLPrefixError
63
+ end
64
+
65
+ it 'should follow a relative link' do
66
+ pages = []
67
+ pages << FakePage.new('home', :hrefs => ['/subpage', 'otherpage'])
68
+ pages << FakePage.new('subpage')
69
+ pages << FakePage.new('otherpage')
70
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
71
+ @crawler.crawl!
72
+ @crawler.pages[0].short_path.should == 'home'
73
+ @crawler.pages[1].short_path.should == 'subpage'
74
+ @crawler.pages[2].short_path.should == 'otherpage'
75
+ @crawler.pages[2].crawled_page.url.to_s.should == 'http://www.example.com/otherpage'
76
+ end
77
+ end
78
+
79
+ context 'folder' do
80
+ it 'should not be created if destination_path does not exist' do
81
+ lambda {
82
+ @crawler = Crawler.new(@options.merge({
83
+ :url=>FakePage.new('home').url,
84
+ :destination_path=>File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder_that_doesnt_exist'
85
+ }))
86
+ }.should raise_error DestinationPathDoesNotExist
87
+ end
88
+
89
+ it 'should be created' do
90
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
91
+ @crawler.crawl!
92
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home').should == true
93
+ end
94
+
95
+ it 'should have an index.html file in it' do
96
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
97
+ @crawler.crawl!
98
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home'+File::SEPARATOR+'index.html').should == true
99
+ end
100
+
101
+ it 'should throw an error if is not writable' do
102
+ lambda {
103
+ @crawler = Crawler.new(@options.merge({
104
+ :url=>FakePage.new('home').url,
105
+ :destination_path=>File.expand_path('spec/non_writable/')
106
+ }))
107
+ }.should raise_error DestinationPathNotWritableError
108
+ end
109
+ end
110
+
111
+ context 'folder with a sub folder in it' do
112
+
113
+ it 'should be created' do
114
+ pages = []
115
+ pages << FakePage.new('folder', :links => ['folder/subfolder'])
116
+ pages << FakePage.new('folder/subfolder')
117
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
118
+ @crawler.crawl!
119
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder').should == true
120
+ end
121
+
122
+ it 'should have an index.html file in it' do
123
+ pages = []
124
+ pages << FakePage.new('folder', :links => ['folder/subfolder'])
125
+ pages << FakePage.new('folder/subfolder')
126
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
127
+ @crawler.crawl!
128
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder'+File::SEPARATOR+'index.html').should == true
129
+ end
130
+ end
131
+
132
+ end
133
+ end
File without changes
@@ -0,0 +1,65 @@
1
+ begin
2
+ require 'fakeweb'
3
+ rescue LoadError
4
+ warn "You need the 'fakeweb' gem installed to test StaticGenerator"
5
+ exit
6
+ end
7
+
8
+ FakeWeb.allow_net_connect = false
9
+
10
+ module StaticGenerator
11
+ SPEC_DOMAIN = "http://www.example.com/"
12
+
13
+ class FakePage
14
+ attr_accessor :links
15
+ attr_accessor :hrefs
16
+ attr_accessor :body
17
+
18
+ def initialize(name = '', options = {})
19
+ @name = name
20
+ @links = [options[:links]].flatten if options.has_key?(:links)
21
+ @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
22
+ @redirect = options[:redirect] if options.has_key?(:redirect)
23
+ @content_type = options[:content_type] || "text/html"
24
+ @body = options[:body]
25
+
26
+ create_body unless @body
27
+ add_to_fakeweb
28
+ end
29
+
30
+ def url
31
+ SPEC_DOMAIN + @name
32
+ end
33
+
34
+ private
35
+
36
+ def create_body
37
+ @body = "<html><body>"
38
+ @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
39
+ @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
40
+ @body += "</body></html>"
41
+ end
42
+
43
+ def add_to_fakeweb
44
+ options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
45
+
46
+ if @redirect
47
+ options[:status] = [301, "Permanently Moved"]
48
+
49
+ # only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
50
+ redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
51
+ options[:location] = redirect_url
52
+
53
+ # register the page this one redirects to
54
+ FakeWeb.register_uri(:get, redirect_url, {:body => '',
55
+ :content_type => @content_type,
56
+ :status => [200, "OK"]})
57
+ end
58
+
59
+ FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
60
+ end
61
+ end
62
+ end
63
+
64
+ #default root
65
+ StaticGenerator::FakePage.new
@@ -0,0 +1,8 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/fakeweb_helper'
3
+
4
+ $:.unshift(File.dirname(__FILE__) + '/../lib/')
5
+ require 'static_generator'
6
+
7
+ SPEC_DOMAIN = 'http://www.example.com/'
8
+
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "static_generator/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "static_generator"
7
+ s.version = StaticGenerator::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Julien Desrosiers"]
10
+ s.email = ["hello@juliendesrosiers.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Crawler that generates a file structure that can be served by Apache without any change}
13
+ s.description = %q{Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working.}
14
+
15
+ s.rubyforge_project = "static_generator"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_development_dependency "rspec", "~> 2.4.0"
23
+ s.add_development_dependency "fakeweb", "~> 1.3.0"
24
+ s.add_development_dependency "awesome_print", "~> 0.3.2"
25
+
26
+ s.add_dependency "anemone", "~> 0.5.0"
27
+ end
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: static_generator
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 3
10
+ version: 0.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Julien Desrosiers
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-10 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 31
30
+ segments:
31
+ - 2
32
+ - 4
33
+ - 0
34
+ version: 2.4.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: fakeweb
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ hash: 27
46
+ segments:
47
+ - 1
48
+ - 3
49
+ - 0
50
+ version: 1.3.0
51
+ type: :development
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: awesome_print
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ hash: 23
62
+ segments:
63
+ - 0
64
+ - 3
65
+ - 2
66
+ version: 0.3.2
67
+ type: :development
68
+ version_requirements: *id003
69
+ - !ruby/object:Gem::Dependency
70
+ name: anemone
71
+ prerelease: false
72
+ requirement: &id004 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ hash: 11
78
+ segments:
79
+ - 0
80
+ - 5
81
+ - 0
82
+ version: 0.5.0
83
+ type: :runtime
84
+ version_requirements: *id004
85
+ description: "Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working."
86
+ email:
87
+ - hello@juliendesrosiers.com
88
+ executables: []
89
+
90
+ extensions: []
91
+
92
+ extra_rdoc_files: []
93
+
94
+ files:
95
+ - .gitignore
96
+ - Gemfile
97
+ - README.textile
98
+ - Rakefile
99
+ - lib/static_generator.rb
100
+ - lib/static_generator/version.rb
101
+ - spec/crawler_spec.rb
102
+ - spec/destination_directory/.gitkeep
103
+ - spec/fakeweb_helper.rb
104
+ - spec/spec_helper.rb
105
+ - static_generator.gemspec
106
+ has_rdoc: true
107
+ homepage: ""
108
+ licenses: []
109
+
110
+ post_install_message:
111
+ rdoc_options: []
112
+
113
+ require_paths:
114
+ - lib
115
+ required_ruby_version: !ruby/object:Gem::Requirement
116
+ none: false
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ hash: 3
121
+ segments:
122
+ - 0
123
+ version: "0"
124
+ required_rubygems_version: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ hash: 3
130
+ segments:
131
+ - 0
132
+ version: "0"
133
+ requirements: []
134
+
135
+ rubyforge_project: static_generator
136
+ rubygems_version: 1.4.1
137
+ signing_key:
138
+ specification_version: 3
139
+ summary: Crawler that generates a file structure that can be served by Apache without any change
140
+ test_files:
141
+ - spec/crawler_spec.rb
142
+ - spec/destination_directory/.gitkeep
143
+ - spec/fakeweb_helper.rb
144
+ - spec/spec_helper.rb