RubyGems - repub - Versions diffs - 0.3.2 → 0.3.3 - Mend

repub 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

data/History.txt +25 -9
data/README.rdoc +46 -40
data/Rakefile +1 -0
data/bin/repub +1 -1
data/lib/repub.rb +1 -1
data/lib/repub/app.rb +3 -3
data/lib/repub/app/builder.rb +84 -36
data/lib/repub/app/fetcher.rb +13 -11
data/lib/repub/app/options.rb +36 -5
data/lib/repub/app/parser.rb +1 -1
data/lib/repub/app/profile.rb +16 -15
data/lib/repub/epub/container.rb +28 -28
data/lib/repub/epub/content.rb +59 -34
data/lib/repub/epub/toc.rb +139 -139
data/repub.gemspec +3 -3
data/test/data/custom.css +3 -0
data/test/data/invisiblellama.png +0 -0
data/test/data/test.css +5 -0
data/test/data/test.html +60 -0
data/test/epub/test_container.rb +4 -4
data/test/epub/test_content.rb +42 -38
data/test/epub/test_toc.rb +19 -7
data/test/test_builder.rb +145 -1
data/test/test_fetcher.rb +79 -20
data/test/test_parser.rb +45 -32
metadata +6 -2

data/repub.gemspec CHANGED

@@ -2,11 +2,11 @@
 Gem::Specification.new do |s|
   s.name = %q{repub}
-  s.version = "0.3.2"
+  s.version = "0.3.3"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Dmitri Goutnik"]
-  s.date = %q{2009-06-30}
+  s.date = %q{2009-07-05}
   s.default_executable = %q{repub}
   s.description = %q{Repub is a simple HTML to ePub converter.
@@ -16,7 +16,7 @@ ePub documents.}
   s.email = %q{dg@invisiblellama.net}
   s.executables = ["repub"]
   s.extra_rdoc_files = ["History.txt", "README.rdoc", "bin/repub"]
-  s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
+  s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/data/custom.css", "test/data/invisiblellama.png", "test/data/test.css", "test/data/test.html", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
   s.homepage = %q{http://rubyforge.org/projects/repub/}
   s.rdoc_options = ["--main", "README.rdoc"]
   s.require_paths = ["lib"]

data/test/data/custom.css ADDED

@@ -0,0 +1,3 @@
+p {
+    line-height: 150%;
+}

data/test/data/invisiblellama.png ADDED

Binary file

data/test/data/test.css ADDED

@@ -0,0 +1,5 @@
+body {
+	margin: 20px;
+	padding: 10px;
+	border: 1px solid #999;
+}

data/test/data/test.html ADDED

@@ -0,0 +1,60 @@
+<html>
+<head>
+<title>Test Page</title>
+<link rel='stylesheet' type='text/css' href='test.css'/>
+<style type='text/css'>
+h1 {
+  font-size: 4em;
+}
+div.img {
+  text-align: right;
+  background-color: #000;
+  padding: 10px;
+}
+div.img img {
+   border: none;
+}
+</style>
+</head>
+<body>
+  <div class='img'>
+    <img src='invisiblellama.png' alt='invisible llama'/>
+  </div>
+  <h1>Lorem Ipsum</h1>
+  <ul>
+    <li>
+      <a href='#c1'>Chapter 1</a>
+      <ul>
+        <li><a href='#c11'>Chapter 1.1</a></li>
+        <li><a href='#c12'>Chapter 1.2</a></li>
+      </ul>
+    </li>
+    <li>
+      <a href='#c2'>Chapter 2</a>
+      <ul>
+        <li><a href='#c21'>Chapter 2.1</a></li>
+      </ul>
+    </li>
+    <li>
+      <a href='#c3'>Chapter 3</a>
+    </li>
+  </ul>
+  <h1><a id='c1'/>Chapter 1</h1>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+  <h3><a id='c11'/>Chapter 1.1</h3>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+  <h3><a id='c12'/>Chapter 1.2</h3>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+  <h1><a id='c2'/>Chapter 2</h1>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+  <h3><a id='c21'/>Chapter 2.1</h3>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+  <h1><a id='c3'/>Chapter 3</h1>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+</body>
+</html>

data/test/epub/test_container.rb CHANGED

@@ -7,9 +7,9 @@ class TestContainer < Test::Unit::TestCase
   def test_container_create
     c = Repub::Epub::Container.new
     s = c.to_xml
-    doc = Nokogiri::HTML(s)
-    #puts s
-    assert_not_nil(doc.search('rootfile'))
+    doc = Nokogiri::XML.parse(s)
+    assert_not_nil(doc.at('rootfile'))
+    assert_equal('content.opf', doc.at('rootfile')['full-path'])
+    assert_equal('application/oebps-package+xml', doc.at('rootfile')['media-type'])
   end
 end

data/test/epub/test_content.rb CHANGED

@@ -4,53 +4,57 @@ require 'nokogiri'
 require 'repub/epub'
 class TestContent < Test::Unit::TestCase
+  def test_create
+    x = Repub::Epub::Content.new('some-name')
+    s = x.to_xml
+    doc = Nokogiri::XML.parse(s)
+    #p doc
+    metadata = doc.at('metadata')
+    assert_not_nil(metadata)
+    assert_equal('some-name', metadata.xpath('dc:identifier', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
+    assert_equal('Untitled', metadata.xpath('dc:title', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
+    assert_equal('en', metadata.xpath('dc:language', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
+    assert_equal(Date.today.to_s, metadata.xpath('dc:date', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
+  end
   def test_manifest_create
     x = Repub::Epub::Content.new('some-name')
     s = x.to_xml
-    #puts s
-    doc = Nokogiri::HTML(s)
+    doc = Nokogiri::XML.parse(s)
+    #p doc
-    # manifest was created
-    assert_not_nil(doc.search('manifest'))
-    # has exactly one item
-    assert_equal(1, doc.search('manifest/item').size)
-    # and item is ncx
-    assert_equal('ncx', doc.search('manifest/item')[0][:id])
-    # spine was created
-    assert_not_nil(doc.search('spine'))
-    # and is empty
-    assert_equal(0, doc.search('spine/item').size)
+    manifest = doc.at('manifest')
+    assert_not_nil(manifest)
+    assert_equal(1, manifest.children.size)
+    assert_equal('ncx', manifest.at('item')['id'])
+    assert_not_nil(doc.at('spine'))
+    assert_equal(0, doc.xpath('spine/item').size)
   end
-  def test_manifest
+  def test_manifest_items
     x = Repub::Epub::Content.new('some-name')
-    x.add_page_template
-    x.add_stylesheet 'style.css'
-    x.add_stylesheet 'more-style.css'
-    x.add_image ' logo.jpg '
-    x.add_image ' image.png'
-    x.add_image 'picture.jpeg     '
-    x.add_document 'intro.html', 'intro'
-    x.add_document 'chapter-1.html'
-    x.add_document 'glossary.html', 'glossary'
+    x.add_item 'style.css'
+    x.add_item 'more-style.css'
+    x.add_item ' logo.jpg '
+    x.add_item ' image.png'
+    x.add_item 'picture.jpeg     '
+    x.add_item 'intro.html', 'intro'
+    x.add_item 'chapter-1.html'
+    x.add_item 'glossary.html', 'glossary'
     s = x.to_xml
-    #puts s
     doc = Nokogiri::HTML(s)
+    #p doc
-    # manifest was created
-    assert_not_nil(doc.search('manifest'))
-    # has 2 stylesheets
-    assert_equal(2, doc.search('manifest/item[@media-type = "text/css"]').size)
-    # and 2 jpegs
-    assert_equal(2, doc.search('manifest/item[@media-type = "image/jpeg"]').size)
-    # and 1 png
-    assert_equal(1, doc.search('manifest/item[@media-type = "image/png"]').size)
-    # spine was created
-    assert_not_nil(doc.search('spine'))
-    # and has 3 html items
-    assert_equal(3, doc.search('spine/itemref').size)
-    # check that order is as inserted and ids are correct
-    assert_equal('intro', doc.search('spine/itemref')[0]['idref'])
-    assert_equal('glossary', doc.search('spine/itemref')[2]['idref'])
+    manifest = doc.at('manifest')
+    assert_not_nil(manifest)
+    assert_equal(2, manifest.xpath('item[@media-type="text/css"]').size)
+    assert_equal(2, manifest.search('item[@media-type="image/jpeg"]').size)
+    assert_equal(1, manifest.search('item[@media-type="image/png"]').size)
+    spine = doc.at('spine')
+    assert_equal(3, spine.search('itemref').size)
+    assert_equal('intro', spine.at('./itemref[position()=1]')['idref'])
+    assert_equal('glossary', spine.at('./itemref[position()=3]')['idref'])
   end
 end

data/test/epub/test_toc.rb CHANGED

@@ -7,12 +7,16 @@ class TestToc < Test::Unit::TestCase
   def test_toc_create
     x = Repub::Epub::Toc.new('some-name')
     s = x.to_xml
-    #puts s
-    doc = Nokogiri::HTML(s)
-    # TODO
+    doc = Nokogiri::XML.parse(s)
+    assert_equal('some-name', doc.at("//xmlns:meta[@name='dtb:uid']")['content'])
+    assert_equal('1', doc.at("//xmlns:meta[@name='dtb:depth']")['content'])
+    assert_equal('0', doc.at("//xmlns:meta[@name='dtb:totalPageCount']")['content'])
+    assert_equal('0', doc.at("//xmlns:meta[@name='dtb:maxPageNumber']")['content'])
+    assert_equal('Untitled', doc.at("//xmlns:docTitle/xmlns:text").inner_text)
+    assert_not_nil(doc.at('//xmlns:navMap'))
   end
-  def test_toc
+  def test_nav_map
     x = Repub::Epub::Toc.new('some-name')
     p0 = x.nav_map.add_nav_point('Intro', 'intro.html')
     p1 = x.nav_map.add_nav_point('Chapter 1', 'chapter-1.html')
@@ -22,8 +26,16 @@ class TestToc < Test::Unit::TestCase
     p11 = p1.add_nav_point('Chapter 1-1', 'chapter-1-1.html')
     p12 = p1.add_nav_point('Chapter 1-2', 'chapter-1-2.html')
     s = x.to_xml
-    #puts s
-    doc = Nokogiri::HTML(s)
-    # TODO
+    doc = Nokogiri::XML.parse(s)
+    assert_equal(4, doc.xpath('//xmlns:navMap/xmlns:navPoint').size)
+    assert_equal('2', doc.at("//xmlns:meta[@name='dtb:depth']")['content'])
+    assert_equal('1', doc.at('//xmlns:navMap/xmlns:navPoint[position()=1]')['playOrder'])
+    assert_equal('2', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]')['playOrder'])
+    assert_equal('3', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]/xmlns:navPoint[position()=1]')['playOrder'])
+    assert_equal('5', doc.at('//xmlns:navMap/xmlns:navPoint[position()=3]')['playOrder'])
+    assert_equal('navPoint-2', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]')['id'])
+    assert_equal('Chapter 1', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]/xmlns:navLabel/xmlns:text').inner_text)
+    assert_equal('chapter-1.html', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]/xmlns:content')['src'])
+    assert_equal(7, doc.xpath('//xmlns:navMap//xmlns:navPoint').size)
   end
 end

data/test/test_builder.rb CHANGED

@@ -1,8 +1,152 @@
 require 'test/unit'
 require 'repub'
+require 'repub/app'
 class TestBuilder < Test::Unit::TestCase
+  include Repub::App::Fetcher
+  include Repub::App::Parser
+  include Repub::App::Builder
+  attr_reader :options
+  def setup
+    @url = 'file://' + File.expand_path(File.join(File.dirname(__FILE__), 'data/test.html'))
+    @options = {
+      :url            => @url,
+      # NOTE: cannot test with wget because it doesn't support file:// schema
+      :helper         => 'httrack',
+      :selectors => {
+        :title        => '//h1',
+        :toc          => '//ul',
+        :toc_item     => './li',
+        :toc_section  => './ul'
+      },
+      # do not delete temp folder
+      :browser        => true
+    }
+    Cache.cleanup
+  end
+  def teardown
+    Cache.cleanup
+  end
   def test_builder
-    # TODO
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    assert(doc_path.include?('test.html'))
+    doc_text = IO.read(doc_path)
+    # doctype was added
+    assert(doc_text =~ /^<!DOCTYPE/)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    # encoding was set to utf-8
+    doc.xpath('//head/meta[@http-equiv="Content-Type"]').each do |el|
+      assert_equal('text/html; charset=utf-8', el['content'].downcase)
+    end
+  end
+  def test_rx
+    @options[:rx] = ['/Chapter/Retpahc/', '/<h1>/<h2>/', '/<\/h1>/<\/h2>/', '/\s?[Ll]orem\s+//']
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    assert(doc_text =~ /Retpahc/ && doc_text !~ /Chapter/)
+    assert(doc_text =~ /<h2>/ && doc_text !~ /<h1>/)
+    assert(doc_text =~ /<\/h2>/ && doc_text !~ /<\/h1>/)
+    assert(doc_text !~ /[Ll]orem/)
+  end
+  def test_custom_css
+    @options[:css] = File.expand_path(File.join(File.dirname(__FILE__), 'data/custom.css'))
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    links = doc.xpath('//head/link[@rel="stylesheet"]')
+    # we have single link
+    assert_equal(1, links.size)
+    # referencing custom.css
+    assert_equal('custom.css', links[0]['href'])
+    head_last_child = doc.at('//head/*[last()]')
+    # and it is head's last child
+    assert_equal(links[0], head_last_child)
+  end
+  def test_removing_styles
+    @options[:css] = '-'
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    links = doc.xpath('//head/link[@rel="stylesheet"]')
+    # no stylesheet links
+    assert_equal(0, links.size)
+    styles = doc.xpath('//head/style')
+    # no <style> elements
+    assert_equal(0, styles.size)
+  end
+  def next_nontext_sibling(el)
+    begin
+      el = el.next_sibling
+    end while el.text?
+    el
+  end
+  def previous_nontext_sibling(el)
+    begin
+      el = el.previous_sibling
+    end while el.text?
+    el
+  end
+  def test_inserting_elements_after
+    selector1 = '//ul'
+    fragment1 = Nokogiri::HTML.fragment('<p>blah</p>')
+    selector2 = '//p[last()]'
+    fragment2 = Nokogiri::HTML.fragment('<span>bleh</span><div>boo</div>')
+    @options[:after] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    el = next_nontext_sibling(doc.at(selector1))
+    assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
+    # first fragment node
+    el = next_nontext_sibling(doc.at(selector2))
+    assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
+    # second fragment node
+    el = next_nontext_sibling(el)
+    assert_equal(fragment2.children[1].to_s.strip, el.to_s.strip)
+  end
+  def test_inserting_elements_before
+    selector1 = '//a[@id="c11"]'
+    fragment1 = Nokogiri::HTML.fragment('<h4>blah</h4><div>boo</div>')
+    selector2 = '//p[position()=5]'
+    fragment2 = Nokogiri::HTML.fragment('<div>test</div>')
+    @options[:before] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    # first fragment node
+    el = previous_nontext_sibling(doc.at(selector1))
+    assert_equal(fragment1.children[1].to_s.strip, el.to_s.strip)
+    # second fragment node
+    el = previous_nontext_sibling(el)
+    assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
+    el = previous_nontext_sibling(doc.at(selector2))
+    assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
+  end
+  def test_remove_elements
+    @options[:remove] = ['ul', '//a[@id="c2"]', 'div[@class="img"]']
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    @options[:remove].each do |selector|
+      assert_equal(0, doc.xpath(selector).size)
+    end
   end
 end

data/test/test_fetcher.rb CHANGED

@@ -7,30 +7,89 @@ class TestFetcher < Test::Unit::TestCase
   include Repub::App::Fetcher
   attr_reader :options
-  def test_fetcher
+  def setup
+    @url = 'file://' + File.expand_path(File.join(File.dirname(__FILE__), 'data/test.html'))
     @options = {
-      :url            => 'http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html',
-      :helper         => 'wget'
+      :url            => @url,
+      # NOTE: cannot test with wget because it doesn't support file:// schema
+      :helper         => 'httrack'
     }
-    assert_nothing_raised do
-      cache = fetch
-      #p cache
-      assert_equal('http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html', cache.url)
-      assert(cache.path.include?('.repub/cache/f963050ead9ee7775a4155e13743d47bc851d5d8'))
-      assert_equal('f963050ead9ee7775a4155e13743d47bc851d5d8', cache.name)
-      # assert(File.exist?(File.join(f.asset_root, f.asset_name)), "Fetch failed.")
-    end
+    Cache.cleanup
+  end
+  def teardown
+    Cache.cleanup
+  end
+  def test_cache_cleanup
+    Cache.cleanup
+    assert_equal(0, Dir.glob(Cache.root + '/**').size)
+    cache = fetch
+    assert_equal(1, Dir.glob(Cache.root + '/**').size)
+    assert_equal(3, Dir.glob(cache.path + '/*').size)
+    Cache.cleanup
+    assert_equal(0, Dir.glob(Cache.root + '/**').size)
+  end
+  def test_fetcher
+    cache = fetch
+    assert_equal(@url, cache.url)
+    assert_equal('8b8d358cf1ada41d4fee885a47530296528dc235', cache.name)
+    assert(cache.path.include?('.repub/cache/8b8d358cf1ada41d4fee885a47530296528dc235'))
+    assert(File.exist?(File.join(cache.path, cache.assets[:documents][0])))
+    assert_equal(1, cache.assets[:documents].size)
+    assert_equal('test.html', cache.assets[:documents][0])
+    assert(File.exist?(File.join(cache.path, cache.assets[:stylesheets][0])))
+    assert_equal(1, cache.assets[:stylesheets].size)
+    assert_equal('test.css', cache.assets[:stylesheets][0])
+    assert(File.exist?(File.join(cache.path, cache.assets[:images][0])))
+    assert_equal(1, cache.assets[:images].size)
+    assert_equal('invisiblellama.png', cache.assets[:images][0])
   end
   def test_fetcher_fail
-    @options = {
-      :url            => 'not-existing',
-      :helper         => 'wget'
-    }
-   assert_raise(Repub::App::FetcherException) do
-     cache = fetch
-     #p cache
-   end
+    # empty url
+    @options[:url] = nil
+    assert_raise(Repub::App::FetcherException) do
+      cache = fetch
+    end
+    @options[:url] = ''
+    assert_raise(Repub::App::FetcherException) do
+      cache = fetch
+    end
+    # empty download helper
+    @options[:url] = 'bleh'
+    @options[:helper] = nil
+    assert_raise(Repub::App::FetcherException) do
+      cache = fetch
+    end
+    @options[:helper] = ''
+    assert_raise(Repub::App::FetcherException) do
+      cache = fetch
+    end
+    # unknown download helper
+    @options[:helper] = 'blah'
+    assert_raise(Repub::App::FetcherException) do
+      cache = fetch
+    end
+    # unresolvable url
+    @options[:helper] = 'wget'
+    assert_raise(Repub::App::FetcherException) do
+      cache = fetch
+    end
+    @options[:helper] = 'httrack'
+    assert_raise(Repub::App::FetcherException) do
+      cache = fetch
+    end
+  end
+  def test_file_encoding_conversion
+    cache = fetch
+    assert_equal('test.html', cache.assets[:documents][0])
+    doc = cache.assets[:documents][0]
+    s_orig = IO.read(File.join(cache.path, doc))
+    encoding = UniversalDetector.chardet(s_orig)['encoding']
+    s_converted = Iconv.conv('utf-8', encoding, s_orig)
+    assert_equal(s_orig, s_converted)
   end
 end