RubyGems - static_generator - Versions diffs - 0.0.3 - Mend

static_generator 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data/.gitignore +4 -0
data/Gemfile +6 -0
data/README.textile +32 -0
data/Rakefile +22 -0
data/lib/static_generator.rb +76 -0
data/lib/static_generator/version.rb +3 -0
data/spec/crawler_spec.rb +133 -0
data/spec/destination_directory/.gitkeep +0 -0
data/spec/fakeweb_helper.rb +65 -0
data/spec/spec_helper.rb +8 -0
data/static_generator.gemspec +27 -0
metadata +144 -0

data/.gitignore ADDED

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/Gemfile ADDED

@@ -0,0 +1,6 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in static_generator.gemspec
+gemspec

data/README.textile ADDED

@@ -0,0 +1,32 @@
+h1. Why did i create Static Generator?
+Because i wanted a simple way to grab the HTML from a site generated with my Ruby on Rails CMS.
+The cool thing with StaticGenerator compared to other solutions (wget), is that when i have an URL like /home/subpage , instead
+of having this file structure:
+pre. /home
+  /subpage.html
+i have this file structure:
+pre. /home
+  /subpage
+    /index.html
+so all the links in the generated site will stay the same.
+h2. Usage
+pre. require 'static_generator'
+crawler = StaticGenerator::Crawler.new({
+  :url=>'http://mysite.com/',
+  :destination_path => File.expand_path('some/destination/directory'),
+  :url_prefix => 'http://mysite.com/'
+})
+crawler.crawl!
+h2. Author
+Julien Desrosiers

data/Rakefile ADDED

@@ -0,0 +1,22 @@
+require 'bundler'
+Bundler::GemHelper.install_tasks
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task :default => :spec
+namespace(:spec) do
+  desc "Run all specs on multiple ruby versions (requires rvm)"
+  task(:portability) do
+    %w[1.8.6 1.8.7 1.9.2].each do |version|
+      system <<-BASH
+        bash -c 'source ~/.rvm/scripts/rvm;
+                 rvm #{version};
+                 echo "--------- version #{version} ----------\n";
+                 bundle install;
+                 rake spec'
+      BASH
+    end
+  end
+end

data/lib/static_generator.rb ADDED

@@ -0,0 +1,76 @@
+require 'anemone'
+require 'fileutils'
+module StaticGenerator
+  class WrongURLPrefixError < ArgumentError
+  end
+  class DestinationPathDoesNotExist < ArgumentError
+  end
+  class DestinationPathNotWritableError < ArgumentError
+  end
+  class Page
+    attr_reader :crawled_page
+    def initialize(crawled_page, url_prefix)
+      @crawled_page = crawled_page # from anemone
+      @url_prefix = url_prefix
+    end
+    def short_path
+      @crawled_page.url.to_s[@url_prefix.length, @crawled_page.url.to_s.length]
+    end
+    def body
+      @crawled_page.body
+    end
+  end
+  class Crawler
+    attr_reader :pages, :destination_path, :url_prefix
+    def initialize(opts)
+      @destination_path = opts[:destination_path]
+      @url_prefix = opts[:url_prefix]
+      @url = opts[:url]
+      if @url_prefix.nil?
+        raise WrongURLPrefixError, "Expected an `url_prefix` option for the given URL."
+      elsif @url !~ /^#{@url_prefix}/
+        raise WrongURLPrefixError, "Expected the `url_prefix` option to exist in the given URL."
+      elsif @url_prefix[-1, 1] != '/'
+        raise WrongURLPrefixError, "Expected the `url_prefix` to end with a '/'."
+      end
+      if ! File.directory? @destination_path
+        raise DestinationPathDoesNotExist
+      elsif ! File.writable? @destination_path
+        raise DestinationPathNotWritableError
+      end
+    end
+    def crawl!
+      @pages = []
+      Anemone.crawl(@url) do |anemone|
+        anemone.on_every_page do |page|
+          @pages << Page.new(page, @url_prefix)
+        end
+      end
+      generate_folders
+    end
+    def generate_folders
+      @pages.each{|page|
+        directory = File.expand_path(@destination_path)+File::SEPARATOR+(page.short_path.split('/').join(File::SEPARATOR))
+        FileUtils.mkdir_p( directory )
+        File.open("#{directory}/index.html", 'w') {|f| f.write(page.body) }
+      }
+    end
+  end
+end

data/lib/static_generator/version.rb ADDED

@@ -0,0 +1,3 @@
+module StaticGenerator
+  VERSION = "0.0.3"
+end

data/spec/crawler_spec.rb ADDED

@@ -0,0 +1,133 @@
+require File.dirname(__FILE__) + '/spec_helper'
+require 'ap'
+module StaticGenerator
+  describe Crawler do
+    before(:each) do
+      @options = {
+        :destination_path => File.expand_path('spec/destination_directory'),
+        :url_prefix => 'http://www.example.com/'
+      }
+    end
+    after(:each) do
+      FileUtils.rm_rf(Dir.glob(File.expand_path('spec/destination_directory')+File::SEPARATOR+'*'))
+    end
+    context 'crawling' do
+      it 'should find a page that links to another pages' do
+        pages = []
+        pages << FakePage.new('0', :links => '1')
+        pages << FakePage.new('1')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages.size.should == 2
+      end
+      it 'should find a page that links to two other pages' do
+        pages = []
+        pages << FakePage.new('0', :links => ['1','2'])
+        pages << FakePage.new('1')
+        pages << FakePage.new('2')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages.size.should == 3
+      end
+      it 'should have the right prefix for the given url' do
+        @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
+        @crawler.crawl!
+        @crawler.pages[0].short_path.should == 'home'
+        pages = []
+        pages << FakePage.new('0', :links => ['0/1'])
+        pages << FakePage.new('0/1')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages[1].short_path.should == '0/1'
+        pages = []
+        pages << FakePage.new('root', :links => ['root/subpage/subsubpage'])
+        pages << FakePage.new('root/subpage/subsubpage')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages[1].short_path.should == 'root/subpage/subsubpage'
+        # ensure we have a / at the end
+        lambda {
+          @crawler = Crawler.new(@options.merge({:url=>FakePage.new('root').url,:url_prefix => 'http://www.example.com'}))
+        }.should raise_error WrongURLPrefixError
+      end
+      it 'should follow a relative link' do
+        pages = []
+        pages << FakePage.new('home', :hrefs => ['/subpage', 'otherpage'])
+        pages << FakePage.new('subpage')
+        pages << FakePage.new('otherpage')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        @crawler.pages[0].short_path.should == 'home'
+        @crawler.pages[1].short_path.should == 'subpage'
+        @crawler.pages[2].short_path.should == 'otherpage'
+        @crawler.pages[2].crawled_page.url.to_s.should == 'http://www.example.com/otherpage'
+      end
+    end
+    context 'folder' do
+      it 'should not be created if destination_path does not exist' do
+        lambda {
+          @crawler = Crawler.new(@options.merge({
+            :url=>FakePage.new('home').url,
+            :destination_path=>File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder_that_doesnt_exist'
+          }))
+        }.should raise_error DestinationPathDoesNotExist
+      end
+      it 'should be created' do
+        @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home').should == true
+      end
+      it 'should have an index.html file in it' do
+        @crawler = Crawler.new(@options.merge({:url=>FakePage.new('home').url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'home'+File::SEPARATOR+'index.html').should == true
+      end
+      it 'should throw an error if is not writable' do
+        lambda {
+          @crawler = Crawler.new(@options.merge({
+            :url=>FakePage.new('home').url,
+            :destination_path=>File.expand_path('spec/non_writable/')
+          }))
+        }.should raise_error DestinationPathNotWritableError
+      end
+    end
+    context 'folder with a sub folder in it' do
+      it 'should be created' do
+        pages = []
+        pages << FakePage.new('folder', :links => ['folder/subfolder'])
+        pages << FakePage.new('folder/subfolder')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder').should == true
+      end
+      it 'should have an index.html file in it' do
+        pages = []
+        pages << FakePage.new('folder', :links => ['folder/subfolder'])
+        pages << FakePage.new('folder/subfolder')
+        @crawler = Crawler.new(@options.merge({:url=>pages[0].url}))
+        @crawler.crawl!
+        File.exists?(File.expand_path('spec/destination_directory/')+File::SEPARATOR+'folder'+File::SEPARATOR+'subfolder'+File::SEPARATOR+'index.html').should == true
+      end
+    end
+  end
+end

data/spec/destination_directory/.gitkeep ADDED

File without changes

data/spec/fakeweb_helper.rb ADDED

@@ -0,0 +1,65 @@
+begin
+  require 'fakeweb'
+rescue LoadError
+  warn "You need the 'fakeweb' gem installed to test StaticGenerator"
+  exit
+end
+FakeWeb.allow_net_connect = false
+module StaticGenerator
+  SPEC_DOMAIN = "http://www.example.com/"
+  class FakePage
+    attr_accessor :links
+    attr_accessor :hrefs
+    attr_accessor :body
+    def initialize(name = '', options = {})
+      @name = name
+      @links = [options[:links]].flatten if options.has_key?(:links)
+      @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
+      @redirect = options[:redirect] if options.has_key?(:redirect)
+      @content_type = options[:content_type] || "text/html"
+      @body = options[:body]
+      create_body unless @body
+      add_to_fakeweb
+    end
+    def url
+      SPEC_DOMAIN + @name
+    end
+    private
+    def create_body
+      @body = "<html><body>"
+      @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
+      @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
+      @body += "</body></html>"
+    end
+    def add_to_fakeweb
+      options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
+      if @redirect
+        options[:status] = [301, "Permanently Moved"]
+        # only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
+        redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
+        options[:location] = redirect_url
+        # register the page this one redirects to
+        FakeWeb.register_uri(:get, redirect_url, {:body => '',
+                                                  :content_type => @content_type,
+                                                  :status => [200, "OK"]})
+      end
+      FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
+    end
+  end
+end
+#default root
+StaticGenerator::FakePage.new

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,8 @@
+require 'rubygems'
+require File.dirname(__FILE__) + '/fakeweb_helper'
+$:.unshift(File.dirname(__FILE__) + '/../lib/')
+require 'static_generator'
+SPEC_DOMAIN = 'http://www.example.com/'

data/static_generator.gemspec ADDED

@@ -0,0 +1,27 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "static_generator/version"
+Gem::Specification.new do |s|
+  s.name        = "static_generator"
+  s.version     = StaticGenerator::VERSION
+  s.platform    = Gem::Platform::RUBY
+  s.authors     = ["Julien Desrosiers"]
+  s.email       = ["hello@juliendesrosiers.com"]
+  s.homepage    = ""
+  s.summary     = %q{Crawler that generates a file structure that can be served by Apache without any change}
+  s.description = %q{Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working.}
+  s.rubyforge_project = "static_generator"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  s.add_development_dependency "rspec", "~> 2.4.0"
+  s.add_development_dependency "fakeweb", "~> 1.3.0"
+  s.add_development_dependency "awesome_print", "~> 0.3.2"
+  s.add_dependency "anemone", "~> 0.5.0"
+end

metadata ADDED

@@ -0,0 +1,144 @@
+--- !ruby/object:Gem::Specification
+name: static_generator
+version: !ruby/object:Gem::Version
+  hash: 25
+  prerelease:
+  segments:
+  - 0
+  - 0
+  - 3
+  version: 0.0.3
+platform: ruby
+authors:
+- Julien Desrosiers
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-02-10 00:00:00 -05:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 31
+        segments:
+        - 2
+        - 4
+        - 0
+        version: 2.4.0
+  type: :development
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: fakeweb
+  prerelease: false
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 27
+        segments:
+        - 1
+        - 3
+        - 0
+        version: 1.3.0
+  type: :development
+  version_requirements: *id002
+- !ruby/object:Gem::Dependency
+  name: awesome_print
+  prerelease: false
+  requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 23
+        segments:
+        - 0
+        - 3
+        - 2
+        version: 0.3.2
+  type: :development
+  version_requirements: *id003
+- !ruby/object:Gem::Dependency
+  name: anemone
+  prerelease: false
+  requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 11
+        segments:
+        - 0
+        - 5
+        - 0
+        version: 0.5.0
+  type: :runtime
+  version_requirements: *id004
+description: "Crawl a site with 'clean-URLs' and generate a files and folders from it. Example: the URL /page will become /page/index.html instead of /page.html so you can serve it straight from Apache and all the links are still working."
+email:
+- hello@juliendesrosiers.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- README.textile
+- Rakefile
+- lib/static_generator.rb
+- lib/static_generator/version.rb
+- spec/crawler_spec.rb
+- spec/destination_directory/.gitkeep
+- spec/fakeweb_helper.rb
+- spec/spec_helper.rb
+- static_generator.gemspec
+has_rdoc: true
+homepage: ""
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project: static_generator
+rubygems_version: 1.4.1
+signing_key:
+specification_version: 3
+summary: Crawler that generates a file structure that can be served by Apache without any change
+test_files:
+- spec/crawler_spec.rb
+- spec/destination_directory/.gitkeep
+- spec/fakeweb_helper.rb
+- spec/spec_helper.rb