RubyGems - static_generator - Versions diffs - 0.0.3 - Mend

static_generator 0.0.3

Files changed (12) hide show

data/.gitignore +4 -0
data/Gemfile +6 -0
data/README.textile +32 -0
data/Rakefile +22 -0
data/lib/static_generator.rb +76 -0
data/lib/static_generator/version.rb +3 -0
data/spec/crawler_spec.rb +133 -0
data/spec/destination_directory/.gitkeep +0 -0
data/spec/fakeweb_helper.rb +65 -0
data/spec/spec_helper.rb +8 -0
data/static_generator.gemspec +27 -0
metadata +144 -0

data/.gitignore ADDED

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/Gemfile ADDED

@@ -0,0 +1,6 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in static_generator.gemspec
+gemspec

data/README.textile ADDED

@@ -0,0 +1,32 @@
+h1. Why did i create Static Generator?
+Because i wanted a simple way to grab the HTML from a site generated with my Ruby on Rails CMS.
+The cool thing with StaticGenerator compared to other solutions (wget), is that when i have an URL like /home/subpage , instead
+of having this file structure:
+pre. /home
+  /subpage.html
+i have this file structure:
+pre. /home
+  /subpage
+    /index.html
+so all the links in the generated site will stay the same.
+h2. Usage
+pre. require 'static_generator'
+crawler = StaticGenerator::Crawler.new({
+  :url=>'http://mysite.com/',
+  :destination_path => File.expand_path('some/destination/directory'),
+  :url_prefix => 'http://mysite.com/'
+})
+crawler.crawl!
+h2. Author
+Julien Desrosiers

data/Rakefile ADDED

@@ -0,0 +1,22 @@
+require 'bundler'
+Bundler::GemHelper.install_tasks
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task :default => :spec
+namespace(:spec) do
+  desc "Run all specs on multiple ruby versions (requires rvm)"
+  task(:portability) do
+    %w[1.8.6 1.8.7 1.9.2].each do |version|
+      system <<-BASH
+        bash -c 'source ~/.rvm/scripts/rvm;
+                 rvm #{version};
+                 echo "--------- version #{version} ----------\n";
+                 bundle install;
+                 rake spec'
+      BASH
+    end
+  end
+end

data/lib/static_generator.rb ADDED

@@ -0,0 +1,76 @@
+require 'anemone'
+require 'fileutils'
+module StaticGenerator
+  class WrongURLPrefixError < ArgumentError
+  end
+  class DestinationPathDoesNotExist < ArgumentError
+  end
+  class DestinationPathNotWritableError < ArgumentError
+  end
+  class Page
+    attr_reader :crawled_page
+    def initialize(crawled_page, url_prefix)
+      @crawled_page = crawled_page # from anemone
+      @url_prefix = url_prefix
+    end
+    def short_path
+      @crawled_page.url.to_s[@url_prefix.length, @crawled_page.url.to_s.length]
+    end
+    def body
+      @crawled_page.body
+    end
+  end
+  class Crawler
+    attr_reader :pages, :destination_path, :url_prefix
+    def initialize(opts)
+      @destination_path = opts[:destination_path]
+      @url_prefix = opts[:url_prefix]
+      @url = opts[:url]
+      if @url_prefix.nil?
+        raise WrongURLPrefixError, "Expected an `url_prefix` option for the given URL."
+      elsif @url !~ /^#{@url_prefix}/
+        raise WrongURLPrefixError, "Expected the `url_prefix` option to exist in the given URL."
+      elsif @url_prefix[-1, 1] != '/'
+        raise WrongURLPrefixError, "Expected the `url_prefix` to end with a '/'."
+      end
+      if ! File.directory? @destination_path
+        raise DestinationPathDoesNotExist
+      elsif ! File.writable? @destination_path
+        raise DestinationPathNotWritableError
+      end
+    end
+    def crawl!
+      @pages = []
+      Anemone.crawl(@url) do |anemone|
+        anemone.on_every_page do |page|
+          @pages << Page.new(page, @url_prefix)
+        end
+      end
+      generate_folders
+    end
+    def generate_folders
+      @pages.each{|page|
+        directory = File.expand_path(@destination_path)+File::SEPARATOR+(page.short_path.split('/').join(File::SEPARATOR))
+        FileUtils.mkdir_p( directory )
+        File.open("#{directory}/index.html", 'w') {|f| f.write(page.body) }
+      }
+    end
+  end
+end

data/lib/static_generator/version.rb ADDED

@@ -0,0 +1,3 @@
+module StaticGenerator
+  VERSION = "0.0.3"
+end

data/spec/crawler_spec.rb ADDED

@@ -0,0 +1,133 @@
+require File.dirname(__FILE__) + '/spec_helper'
+require 'ap'
+module StaticGenerator
+  describe Crawler do
+    before(:each) do
+      @options = {
+        :destination_path => File.expand_path('spec/destination_directory'),
+        :url_prefix => 'http://www.example.com/'
+      }
+    end
+    after(:each) do
+      FileUtils.rm_rf(Dir.glob(File.expand_path('spec/destination_directory')+File::SEPARATOR+'*'))
+    end
+    context 'crawling' do
+      it 'should find a page that links to another pages' do
+        pages = []
+        pages << FakePage.new('0', :links => '1')
+        pages << FakePage.new('1')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages.size.should == 2
+      end
+      it 'should find a page that links to two other pages' do
+        pages = []
+        pages << FakePage.new('0', :links => ['1','2'])
+        pages << FakePage.new('1')
+        pages << FakePage.new('2')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages.size.should == 3
+      end
+      it 'should have the right prefix for the given url' do
+        @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
+        @crawler.crawl!
+        @crawler.pages[0].short_path.should == 'home'
+        pages = []
+        pages << FakePage.new('0', :links => ['0/1'])
+        pages << FakePage.new('0/1')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages[1].short_path.should == '0/1'
+        pages = []
+        pages << FakePage.new('root', :links => ['root/subpage/subsubpage'])
+        pages << FakePage.new('root/subpage/subsubpage')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages[1].short_path.should == 'root/subpage/subsubpage'
+        # ensure we have a / at the end
+        lambda {
+          @crawler = Crawler.new(@options.merge({:url=>FakePage.new('root').url,:url_prefix => 'http://www.example.com'}))
+        }.should raise_error WrongURLPrefixError
+      end
+      it 'should follow a relative link' do
+        pages = []
+        pages << FakePage.new('home', :hrefs => ['/subpage', 'otherpage'])
+        pages << FakePage.new('subpage')
+        pages << FakePage.new('otherpage')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages[0].short_path.should == 'home'
+        @crawler.pages[1].short_path.should == 'subpage'
+        @crawler.pages[2].short_path.should == 'otherpage'
+        @crawler.pages[2].crawled_page.url.to_s.should == 'http://www.example.com/otherpage'
+      end
+    end
+    context 'folder' do
+      it 'should not be created if destination_path does not exist' do
+        lambda {
+          @crawler = Crawler.new(@options.merge({
+            :url=>FakePage.new('home').url,
+            :destination_path=>File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder_that_doesnt_exist'
+          }))
+        }.should raise_error DestinationPathDoesNotExist
+      end
+      it 'should be created' do
+        @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home').should == true
+      end
+      it 'should have an index.html file in it' do
+        @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home'+File::SEPARATOR+'index.html').should == true
+      end
+      it 'should throw an error if is not writable' do
+        lambda {
+          @crawler = Crawler.new(@options.merge({
+            :url=>FakePage.new('home').url,
+            :destination_path=>File.expand_path('spec/non_writable/')
+          }))
+        }.should raise_error DestinationPathNotWritableError
+      end
+    end
+    context 'folder with a sub folder in it' do
+      it 'should be created' do
+        pages = []
+        pages << FakePage.new('folder', :links => ['folder/subfolder'])
+        pages << FakePage.new('folder/subfolder')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder').should == true
+      end
+      it 'should have an index.html file in it' do
+        pages = []
+        pages << FakePage.new('folder', :links => ['folder/subfolder'])
+        pages << FakePage.new('folder/subfolder')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder'+File::SEPARATOR+'index.html').should == true
+      end
+    end
+  end
+end

data/spec/destination_directory/.gitkeep ADDED

File without changes

data/spec/fakeweb_helper.rb ADDED

@@ -0,0 +1,65 @@
+begin
+  require 'fakeweb'
+rescue LoadError
+  warn "You need the 'fakeweb' gem installed to test StaticGenerator"
+  exit
+end
+FakeWeb.allow_net_connect = false
+module StaticGenerator
+  SPEC_DOMAIN = "http://www.example.com/"
+  class FakePage
+    attr_accessor :links
+    attr_accessor :hrefs
+    attr_accessor :body
+    def initialize(name = '', options = {})
+      @name = name
+      @links = [options[:links]].flatten if options.has_key?(:links)
+      @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
+      @redirect = options[:redirect] if options.has_key?(:redirect)
+      @content_type = options[:content_type] || "text/html"
+      @body = options[:body]
+      create_body unless @body
+      add_to_fakeweb
+    end
+    def url
+      SPEC_DOMAIN + @name
+    end
+    private
+    def create_body
+      @body = "<html><body>"
+      @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
+      @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
+      @body += "</body></html>"
+    end
+    def add_to_fakeweb
+      options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
+      if @redirect
+        options[:status] = [301, "Permanently Moved"]
+        # only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
+        redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
+        options[:location] = redirect_url
+        # register the page this one redirects to
+        FakeWeb.register_uri(:get, redirect_url, {:body => '',
+                                                  :content_type => @content_type,
+                                                  :status => [200, "OK"]})
+      end
+      FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
+    end
+  end
+end
+#default root
+StaticGenerator::FakePage.new

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,8 @@
+require 'rubygems'
+require File.dirname(__FILE__) + '/fakeweb_helper'
+$:.unshift(File.dirname(__FILE__) + '/../lib/')
+require 'static_generator'
+SPEC_DOMAIN = 'http://www.example.com/'

data/static_generator.gemspec ADDED

@@ -0,0 +1,27 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "static_generator/version"
+Gem::Specification.new do |s|
+  s.name        = "static_generator"
+  s.version     = StaticGenerator::VERSION
+  s.platform    = Gem::Platform::RUBY
+  s.authors     = ["Julien Desrosiers"]
+  s.email       = ["hello@juliendesrosiers.com"]
+  s.homepage    = ""
+  s.summary     = %q{Crawler that generates a file structure that can be served by Apache without any change}
+  s.description = %q{Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working.}
+  s.rubyforge_project = "static_generator"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  s.add_development_dependency "rspec", "~> 2.4.0"
+  s.add_development_dependency "fakeweb", "~> 1.3.0"
+  s.add_development_dependency "awesome_print", "~> 0.3.2"
+  s.add_dependency "anemone", "~> 0.5.0"
+end

metadata ADDED

@@ -0,0 +1,144 @@
+--- !ruby/object:Gem::Specification
+name: static_generator
+version: !ruby/object:Gem::Version
+  hash: 25
+  prerelease:
+  segments:
+  - 0
+  - 0
+  - 3
+  version: 0.0.3
+platform: ruby
+authors:
+- Julien Desrosiers
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-02-10 00:00:00 -05:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 31
+        segments:
+        - 2
+        - 4
+        - 0
+        version: 2.4.0
+  type: :development
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: fakeweb
+  prerelease: false
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 27
+        segments:
+        - 1
+        - 3
+        - 0
+        version: 1.3.0
+  type: :development
+  version_requirements: *id002
+- !ruby/object:Gem::Dependency
+  name: awesome_print
+  prerelease: false
+  requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 23
+        segments:
+        - 0
+        - 3
+        - 2
+        version: 0.3.2
+  type: :development
+  version_requirements: *id003
+- !ruby/object:Gem::Dependency
+  name: anemone
+  prerelease: false
+  requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 11
+        segments:
+        - 0
+        - 5
+        - 0
+        version: 0.5.0
+  type: :runtime
+  version_requirements: *id004
+description: "Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working."
+email:
+- hello@juliendesrosiers.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- README.textile
+- Rakefile
+- lib/static_generator.rb
+- lib/static_generator/version.rb
+- spec/crawler_spec.rb
+- spec/destination_directory/.gitkeep
+- spec/fakeweb_helper.rb
+- spec/spec_helper.rb
+- static_generator.gemspec
+has_rdoc: true
+homepage: ""
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project: static_generator
+rubygems_version: 1.4.1
+signing_key:
+specification_version: 3
+summary: Crawler that generates a file structure that can be served by Apache without any change
+test_files:
+- spec/crawler_spec.rb
+- spec/destination_directory/.gitkeep
+- spec/fakeweb_helper.rb
+- spec/spec_helper.rb