static_generator 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in static_generator.gemspec
4
+ gemspec
5
+
6
+
@@ -0,0 +1,32 @@
1
+ h1. Why did i create Static Generator?
2
+
3
+ Because i wanted a simple way to grab the HTML from a site generated with my Ruby on Rails CMS.
4
+ The cool thing with StaticGenerator compared to other solutions (wget), is that when i have an URL like /home/subpage , instead
5
+ of having this file structure:
6
+
7
+ pre. /home
8
+ /subpage.html
9
+
10
+ i have this file structure:
11
+
12
+ pre. /home
13
+ /subpage
14
+ /index.html
15
+
16
+ so all the links in the generated site will stay the same.
17
+
18
+ h2. Usage
19
+
20
+ pre. require 'static_generator'
21
+ crawler = StaticGenerator::Crawler.new({
22
+ :url=>'http://mysite.com/',
23
+ :destination_path => File.expand_path('some/destination/directory'),
24
+ :url_prefix => 'http://mysite.com/'
25
+ })
26
+ crawler.crawl!
27
+
28
+ h2. Author
29
+
30
+ Julien Desrosiers
31
+
32
+
@@ -0,0 +1,22 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
7
+
8
+ namespace(:spec) do
9
+ desc "Run all specs on multiple ruby versions (requires rvm)"
10
+ task(:portability) do
11
+ %w[1.8.6 1.8.7 1.9.2].each do |version|
12
+ system <<-BASH
13
+ bash -c 'source ~/.rvm/scripts/rvm;
14
+ rvm #{version};
15
+ echo "--------- version #{version} ----------\n";
16
+ bundle install;
17
+ rake spec'
18
+ BASH
19
+ end
20
+ end
21
+ end
22
+
@@ -0,0 +1,76 @@
1
+ require 'anemone'
2
+ require 'fileutils'
3
+
4
+ module StaticGenerator
5
+
6
+ class WrongURLPrefixError < ArgumentError
7
+ end
8
+
9
+ class DestinationPathDoesNotExist < ArgumentError
10
+ end
11
+
12
+ class DestinationPathNotWritableError < ArgumentError
13
+ end
14
+
15
+ class Page
16
+
17
+ attr_reader :crawled_page
18
+
19
+ def initialize(crawled_page, url_prefix)
20
+ @crawled_page = crawled_page # from anemone
21
+ @url_prefix = url_prefix
22
+ end
23
+
24
+ def short_path
25
+ @crawled_page.url.to_s[@url_prefix.length, @crawled_page.url.to_s.length]
26
+ end
27
+
28
+ def body
29
+ @crawled_page.body
30
+ end
31
+ end
32
+
33
+ class Crawler
34
+
35
+ attr_reader :pages, :destination_path, :url_prefix
36
+
37
+ def initialize(opts)
38
+ @destination_path = opts[:destination_path]
39
+ @url_prefix = opts[:url_prefix]
40
+ @url = opts[:url]
41
+ if @url_prefix.nil?
42
+ raise WrongURLPrefixError, "Expected an `url_prefix` option for the given URL."
43
+ elsif @url !~ /^#{@url_prefix}/
44
+ raise WrongURLPrefixError, "Expected the `url_prefix` option to exist in the given URL."
45
+ elsif @url_prefix[-1, 1] != '/'
46
+ raise WrongURLPrefixError, "Expected the `url_prefix` to end with a '/'."
47
+ end
48
+
49
+ if ! File.directory? @destination_path
50
+ raise DestinationPathDoesNotExist
51
+ elsif ! File.writable? @destination_path
52
+ raise DestinationPathNotWritableError
53
+ end
54
+ end
55
+
56
+ def crawl!
57
+ @pages = []
58
+ Anemone.crawl(@url) do |anemone|
59
+ anemone.on_every_page do |page|
60
+ @pages << Page.new(page, @url_prefix)
61
+ end
62
+ end
63
+
64
+ generate_folders
65
+ end
66
+
67
+ def generate_folders
68
+ @pages.each{|page|
69
+ directory = File.expand_path(@destination_path)+File::SEPARATOR+(page.short_path.split('/').join(File::SEPARATOR))
70
+ FileUtils.mkdir_p( directory )
71
+ File.open("#{directory}/index.html", 'w') {|f| f.write(page.body) }
72
+ }
73
+ end
74
+ end
75
+
76
+ end
@@ -0,0 +1,3 @@
1
+ module StaticGenerator
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,133 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'ap'
3
+
4
+ module StaticGenerator
5
+ describe Crawler do
6
+ before(:each) do
7
+ @options = {
8
+ :destination_path => File.expand_path('spec/destination_directory'),
9
+ :url_prefix => 'http://www.example.com/'
10
+ }
11
+ end
12
+
13
+ after(:each) do
14
+ FileUtils.rm_rf(Dir.glob(File.expand_path('spec/destination_directory')+File::SEPARATOR+'*'))
15
+ end
16
+
17
+ context 'crawling' do
18
+
19
+ it 'should find a page that links to another pages' do
20
+ pages = []
21
+ pages << FakePage.new('0', :links => '1')
22
+ pages << FakePage.new('1')
23
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
24
+ @crawler.crawl!
25
+
26
+ @crawler.pages.size.should == 2
27
+ end
28
+
29
+ it 'should find a page that links to two other pages' do
30
+ pages = []
31
+ pages << FakePage.new('0', :links => ['1','2'])
32
+ pages << FakePage.new('1')
33
+ pages << FakePage.new('2')
34
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
35
+ @crawler.crawl!
36
+
37
+ @crawler.pages.size.should == 3
38
+ end
39
+
40
+ it 'should have the right prefix for the given url' do
41
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
42
+ @crawler.crawl!
43
+ @crawler.pages[0].short_path.should == 'home'
44
+
45
+ pages = []
46
+ pages << FakePage.new('0', :links => ['0/1'])
47
+ pages << FakePage.new('0/1')
48
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
49
+ @crawler.crawl!
50
+ @crawler.pages[1].short_path.should == '0/1'
51
+
52
+ pages = []
53
+ pages << FakePage.new('root', :links => ['root/subpage/subsubpage'])
54
+ pages << FakePage.new('root/subpage/subsubpage')
55
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
56
+ @crawler.crawl!
57
+ @crawler.pages[1].short_path.should == 'root/subpage/subsubpage'
58
+
59
+ # ensure we have a / at the end
60
+ lambda {
61
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('root').url,:url_prefix => 'http://www.example.com'}))
62
+ }.should raise_error WrongURLPrefixError
63
+ end
64
+
65
+ it 'should follow a relative link' do
66
+ pages = []
67
+ pages << FakePage.new('home', :hrefs => ['/subpage', 'otherpage'])
68
+ pages << FakePage.new('subpage')
69
+ pages << FakePage.new('otherpage')
70
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
71
+ @crawler.crawl!
72
+ @crawler.pages[0].short_path.should == 'home'
73
+ @crawler.pages[1].short_path.should == 'subpage'
74
+ @crawler.pages[2].short_path.should == 'otherpage'
75
+ @crawler.pages[2].crawled_page.url.to_s.should == 'http://www.example.com/otherpage'
76
+ end
77
+ end
78
+
79
+ context 'folder' do
80
+ it 'should not be created if destination_path does not exist' do
81
+ lambda {
82
+ @crawler = Crawler.new(@options.merge({
83
+ :url=>FakePage.new('home').url,
84
+ :destination_path=>File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder_that_doesnt_exist'
85
+ }))
86
+ }.should raise_error DestinationPathDoesNotExist
87
+ end
88
+
89
+ it 'should be created' do
90
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
91
+ @crawler.crawl!
92
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home').should == true
93
+ end
94
+
95
+ it 'should have an index.html file in it' do
96
+ @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
97
+ @crawler.crawl!
98
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home'+File::SEPARATOR+'index.html').should == true
99
+ end
100
+
101
+ it 'should throw an error if is not writable' do
102
+ lambda {
103
+ @crawler = Crawler.new(@options.merge({
104
+ :url=>FakePage.new('home').url,
105
+ :destination_path=>File.expand_path('spec/non_writable/')
106
+ }))
107
+ }.should raise_error DestinationPathNotWritableError
108
+ end
109
+ end
110
+
111
+ context 'folder with a sub folder in it' do
112
+
113
+ it 'should be created' do
114
+ pages = []
115
+ pages << FakePage.new('folder', :links => ['folder/subfolder'])
116
+ pages << FakePage.new('folder/subfolder')
117
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
118
+ @crawler.crawl!
119
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder').should == true
120
+ end
121
+
122
+ it 'should have an index.html file in it' do
123
+ pages = []
124
+ pages << FakePage.new('folder', :links => ['folder/subfolder'])
125
+ pages << FakePage.new('folder/subfolder')
126
+ @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
127
+ @crawler.crawl!
128
+ File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder'+File::SEPARATOR+'index.html').should == true
129
+ end
130
+ end
131
+
132
+ end
133
+ end
File without changes
@@ -0,0 +1,65 @@
1
+ begin
2
+ require 'fakeweb'
3
+ rescue LoadError
4
+ warn "You need the 'fakeweb' gem installed to test StaticGenerator"
5
+ exit
6
+ end
7
+
8
+ FakeWeb.allow_net_connect = false
9
+
10
+ module StaticGenerator
11
+ SPEC_DOMAIN = "http://www.example.com/"
12
+
13
+ class FakePage
14
+ attr_accessor :links
15
+ attr_accessor :hrefs
16
+ attr_accessor :body
17
+
18
+ def initialize(name = '', options = {})
19
+ @name = name
20
+ @links = [options[:links]].flatten if options.has_key?(:links)
21
+ @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
22
+ @redirect = options[:redirect] if options.has_key?(:redirect)
23
+ @content_type = options[:content_type] || "text/html"
24
+ @body = options[:body]
25
+
26
+ create_body unless @body
27
+ add_to_fakeweb
28
+ end
29
+
30
+ def url
31
+ SPEC_DOMAIN + @name
32
+ end
33
+
34
+ private
35
+
36
+ def create_body
37
+ @body = "<html><body>"
38
+ @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
39
+ @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
40
+ @body += "</body></html>"
41
+ end
42
+
43
+ def add_to_fakeweb
44
+ options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
45
+
46
+ if @redirect
47
+ options[:status] = [301, "Permanently Moved"]
48
+
49
+ # only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
50
+ redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
51
+ options[:location] = redirect_url
52
+
53
+ # register the page this one redirects to
54
+ FakeWeb.register_uri(:get, redirect_url, {:body => '',
55
+ :content_type => @content_type,
56
+ :status => [200, "OK"]})
57
+ end
58
+
59
+ FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
60
+ end
61
+ end
62
+ end
63
+
64
+ #default root
65
+ StaticGenerator::FakePage.new
@@ -0,0 +1,8 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/fakeweb_helper'
3
+
4
+ $:.unshift(File.dirname(__FILE__) + '/../lib/')
5
+ require 'static_generator'
6
+
7
+ SPEC_DOMAIN = 'http://www.example.com/'
8
+
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "static_generator/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "static_generator"
7
+ s.version = StaticGenerator::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Julien Desrosiers"]
10
+ s.email = ["hello@juliendesrosiers.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Crawler that generates a file structure that can be served by Apache without any change}
13
+ s.description = %q{Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working.}
14
+
15
+ s.rubyforge_project = "static_generator"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_development_dependency "rspec", "~> 2.4.0"
23
+ s.add_development_dependency "fakeweb", "~> 1.3.0"
24
+ s.add_development_dependency "awesome_print", "~> 0.3.2"
25
+
26
+ s.add_dependency "anemone", "~> 0.5.0"
27
+ end
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: static_generator
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 3
10
+ version: 0.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Julien Desrosiers
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-10 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 31
30
+ segments:
31
+ - 2
32
+ - 4
33
+ - 0
34
+ version: 2.4.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: fakeweb
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ hash: 27
46
+ segments:
47
+ - 1
48
+ - 3
49
+ - 0
50
+ version: 1.3.0
51
+ type: :development
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: awesome_print
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ hash: 23
62
+ segments:
63
+ - 0
64
+ - 3
65
+ - 2
66
+ version: 0.3.2
67
+ type: :development
68
+ version_requirements: *id003
69
+ - !ruby/object:Gem::Dependency
70
+ name: anemone
71
+ prerelease: false
72
+ requirement: &id004 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ hash: 11
78
+ segments:
79
+ - 0
80
+ - 5
81
+ - 0
82
+ version: 0.5.0
83
+ type: :runtime
84
+ version_requirements: *id004
85
+ description: "Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working."
86
+ email:
87
+ - hello@juliendesrosiers.com
88
+ executables: []
89
+
90
+ extensions: []
91
+
92
+ extra_rdoc_files: []
93
+
94
+ files:
95
+ - .gitignore
96
+ - Gemfile
97
+ - README.textile
98
+ - Rakefile
99
+ - lib/static_generator.rb
100
+ - lib/static_generator/version.rb
101
+ - spec/crawler_spec.rb
102
+ - spec/destination_directory/.gitkeep
103
+ - spec/fakeweb_helper.rb
104
+ - spec/spec_helper.rb
105
+ - static_generator.gemspec
106
+ has_rdoc: true
107
+ homepage: ""
108
+ licenses: []
109
+
110
+ post_install_message:
111
+ rdoc_options: []
112
+
113
+ require_paths:
114
+ - lib
115
+ required_ruby_version: !ruby/object:Gem::Requirement
116
+ none: false
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ hash: 3
121
+ segments:
122
+ - 0
123
+ version: "0"
124
+ required_rubygems_version: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ hash: 3
130
+ segments:
131
+ - 0
132
+ version: "0"
133
+ requirements: []
134
+
135
+ rubyforge_project: static_generator
136
+ rubygems_version: 1.4.1
137
+ signing_key:
138
+ specification_version: 3
139
+ summary: Crawler that generates a file structure that can be served by Apache without any change
140
+ test_files:
141
+ - spec/crawler_spec.rb
142
+ - spec/destination_directory/.gitkeep
143
+ - spec/fakeweb_helper.rb
144
+ - spec/spec_helper.rb