RubyGems - invisiblellama-repub - Versions diffs - 0.2.1 → 0.3.1 - Mend

invisiblellama-repub 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/History.txt CHANGED Viewed

@@ -1,3 +1,13 @@
-== 0.1 / 2009-06-26
+== 0.2.1 / 2009-06-26
 * Initial release
+== 0.3.0 / 2009-06-28
+* Switched to Nokogiri for HTML parsing
+* Better parsing for hierarchical TOCs
+* Many bug fixes
+== 0.3.1 / 2009-06-28
+* Fixed App.data_path bug

data/README.txt CHANGED Viewed

@@ -1,27 +1,31 @@
 == DESCRIPTION:
-RePub is a simple HTML to ePub converter.
+Simple HTML to ePub converter.
 == FEATURES/PROBLEMS:
-Few samples to get started: (TODO real description)
+Few samples to get started:
+* Git User's Manual
+    repub -x 'title://h1' -x 'toc://div[@class="toc"]/dl' -x 'toc_item:dt' -x 'toc_section:following-sibling::*[1]/dl' \
+        http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
 * Project Gutenberg's THE ADVENTURES OF SHERLOCK HOLMES
-repub -x 'title://div.book//h1' -x 'toc:body//table' -x 'toc_item://tr' \
-    -X 'body/pre,body//hr,body/h1,body/h2' \
-    http://www.gutenberg.org/dirs/etext99/advsh12h.htm
+    repub -x 'title:div[@class='book']//h1' -x 'toc://table' -x 'toc_item://tr' \
+        -X '//pre' -X '//hr' -X '//body/h1' -X '//body/h2' \
+	    http://www.gutenberg.org/dirs/etext99/advsh12h.htm
 * Project Gutenberg's ALICE'S ADVENTURES IN WONDERLAND
-repub -x 'title:body/h1' -x 'toc:body//table' -x 'toc_item://tr' \
-    -X 'body/pre,body//hr,body/h4' \
-    http://www.gutenberg.org/files/11/11-h/11-h.htm
+    repub -x 'title:body/h1' -x 'toc://table' -x 'toc_item://tr' \
+	    -X '//pre' -X '//hr' -X '//body/h4' \
+	    http://www.gutenberg.org/files/11/11-h/11-h.htm
 * The Gelug-Kagyu Tradition of Mahamudra from Berzin Archives
-repub http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
-* Git User's Manual
-repub -x 'title://h1' -x 'toc://div.toc/dl' -x 'toc_item:/dt' \
-    http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
+    repub http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
 == SYNOPSIS:
@@ -43,7 +47,7 @@ General options:
   -h, --help                       Show this help message.
 Parser options:
-  -x, --selector NAME:VALUE        Set parser XPath or CSS selector NAME to VALUE.
+  -x, --selector NAME:VALUE        Set parser XPath selector NAME to VALUE.
                                    Recognized selectors are: [title toc toc_item toc_section]
   -m, --meta NAME:VALUE            Set publication information metadata NAME to VALUE.
                                    Valid metadata names are: [creator date description
@@ -55,16 +59,21 @@ Parser options:
 Post-processing options:
   -s, --stylesheet PATH            Use custom stylesheet at PATH to add or override existing
                                    CSS references in the source document.
-  -X, --remove SELECTOR            Remove source element using XPath or CSS selector.
+  -X, --remove SELECTOR            Remove source element using XPath selector.
                                    Use -X- to ignore stored profile.
   -R, --rx /PATTERN/REPLACEMENT/   Edit source HTML using regular expressions.
                                    Use -R- to ignore stored profile.
   -B, --browse                     After processing, open resulting HTML in default browser.
-== REQUIREMENTS:
+== DEPENDENCIES:
-    wget or httrack
-    zip (Info-ZIP)
+* Builder (https://rubyforge.org/projects/builder/)
+* Nokogiri (http://nokogiri.rubyforge.org/nokogiri/)
+* rchardet (https://rubyforge.org/projects/rchardet/)
+* launchy (http://copiousfreetime.rubyforge.org/launchy/)
+* wget or httrack
+* zip (Info-ZIP)
 == INSTALL:
@@ -72,9 +81,9 @@ Post-processing options:
 == LICENSE:
-The MIT License
+(The MIT License)
-Copyright (c) 2009 Invisible Llama
+Copyright (c) 2009 Invisible Llama <dg@invisiblellama.net>
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -93,3 +102,5 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
+==

data/Rakefile CHANGED Viewed

@@ -20,11 +20,11 @@ PROJ.email = 'dg@invisiblellama.net'
 PROJ.url = 'http://github.com/invisiblellama/repub/tree/master'
 PROJ.version = Repub::VERSION
 PROJ.rubyforge.name = 'repub'
-PROJ.exclude = %w[tmp/ \.git/ \.DS_Store .*\.tmproj ^pkg/]
+PROJ.exclude = %w[tmp/ \.git \.DS_Store .*\.tmproj .*\.epub ^pkg/]
 PROJ.spec.opts << '--color'
+depend_on 'nokogiri'
 depend_on 'builder'
-depend_on 'hpricot'
 depend_on 'chardet'
 depend_on 'launchy'

data/SAMPLES.txt ADDED Viewed

@@ -0,0 +1,23 @@
+* THE ADVENTURES OF SHERLOCK HOLMES
+repub -x 'title:div[@class='book']//h1' -x 'toc://table' -x 'toc_item://tr' -X '//pre' -X '//hr' -X '//body/h1' -X '//body/h2' http://www.gutenberg.org/dirs/etext99/advsh12h.htm
+* ALICE'S ADVENTURES IN WONDERLAND
+repub -x 'title:body/h1' -x 'toc://table' -x 'toc_item://tr' -X '//pre' -X '//hr' -X '//body/h4' http://www.gutenberg.org/files/11/11-h/11-h.htm
+* The Gelug-Kagyu Tradition of Mahamudra
+repub http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
+* Брюс Стерлинг. Схизматрица
+repub -x 'title://h2' -x 'toc://table' -x 'toc_item://a' -X 'div' -X 'table' -X '//hr' http://lib.ru/STERLINGB/shizmatrica.txt_with-big-pictures.html
+* Айзек Азимов. Космические течения
+repub -x 'title://h2' -x 'toc://table' -x 'toc_item://a' -X 'div' -X 'table' -X '//hr' http://lib.ru/FOUNDATION/currspac.txt_with-big-pictures.html
+* Git User's Manual
+repub -x 'title://h1' -x 'toc://div[@class="toc"]/dl' -x 'toc_item:dt' -x 'toc_section:following-sibling::*[1]/dl' http://www.kernel.org/pub/software/scm/git/docs/user-manual.html

data/{TODO.txt → TODO} RENAMED Viewed

@@ -1,2 +1,3 @@
 √ add support for rx cleaning/modifying source doc
 √ make -q/-v actually do something
+  more parser tokens: author(s) etc

data/bin/repub CHANGED Viewed

@@ -1,24 +1,8 @@
-#!/usr/bin/env ruby
+#!/usr/bin/env ruby -w
 require File.expand_path(
     File.join(File.dirname(__FILE__), %w[.. lib repub]))
 require 'repub/app'
-# THE ADVENTURES OF SHERLOCK HOLMES
-# repub -x 'title:body/h1' -x 'toc:body//table' 'toc_item://tr' -X 'body/pre,body//hr,body/h1,body/h2' http://www.gutenberg.org/dirs/etext99/advsh12h.htm
-#
-# ALICE'S ADVENTURES IN WONDERLAND
-# repub -x 'title:body/h1' -x 'toc:body//table' -x 'toc_item://tr' -X 'body/pre,body//hr,body/h4' http://www.gutenberg.org/files/11/11-h/11-h.htm
-#
-# The Gelug-Kagyu Tradition of Mahamudra
-# http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
-#
-# Брюс Стерлинг. Схизматрица
-# repub -x 'title://h2' -x 'toc:table' -x 'toc_item://a' -X 'div,table,//hr' http://lib.ru/STERLINGB/shizmatrica.txt_with-big-pictures.html
-#
-# Git User's Manual
-# repub -x 'title://h1' -x 'toc://div.toc/dl' -x 'toc_item:/dt' http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
 Repub::App.instance.run ARGV

data/lib/repub.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Repub
   # :stopdoc:
-  VERSION = '0.2.1'
+  VERSION = '0.3.1'
   LIBPATH = File.expand_path(File.dirname(__FILE__)) + File::SEPARATOR
   PATH = File.dirname(LIBPATH) + File::SEPARATOR
   # :startdoc:

data/lib/repub/app.rb CHANGED Viewed

@@ -2,9 +2,9 @@ require 'singleton'
 require 'rubygems'
 require 'launchy'
 require 'repub/app/utility'
+require 'repub/app/logger'
 require 'repub/app/options'
 require 'repub/app/profile'
-require 'repub/app/logger'
 require 'repub/app/fetcher'
 require 'repub/app/parser'
 require 'repub/app/builder'
@@ -21,7 +21,9 @@ module Repub
     end
     def self.data_path
-      File.join(File.expand_path('~'), '.repub')
+      data_path = File.join(File.expand_path('~'), '.repub')
+      FileUtils.mkdir_p(data_path) unless File.exist?(data_path)
+      data_path
     end
     def run(args)

data/lib/repub/app/builder.rb CHANGED Viewed

@@ -97,18 +97,17 @@ module Repub
             log.debug "-- Adding missing doctype"
             source = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + source
           end
-          # Overwrite asset with fixed version
+          # Save processed file
           File.open(asset, 'w') do |f|
             f.write(source)
           end
         end
         def postprocess_doc(asset)
-          # Do Hpricot magic if fixup is ON
-          doc = Hpricot(open(asset), :xhtml_strict => @options[:fixup])
-          # Substitute custom stylesheet
+          doc = Nokogiri::HTML.parse(open(asset), nil, 'UTF-8')
+          # Substitute custom CSS
           if (@options[:css] && !@options[:css].empty?)
-            doc.search('//link[@rel="stylesheet"]') do |link|
+            doc.xpath('//link[@rel="stylesheet"]') do |link|
               link[:href] = File.basename(@options[:css])
               log.debug "-- Replacing CSS refs with #{link[:href]}"
             end
@@ -116,13 +115,22 @@ module Repub
           # Remove elements
           if @options[:remove] && !@options[:remove].empty?
             @options[:remove].each do |selector|
-              log.info "Removing element(s) matching selector \"#{selector}\""
+              log.info "Removing elements matching selector \"#{selector}\""
+              #p doc.search(selector).size
+              #p doc.search(selector)
               doc.search(selector).remove
             end
           end
-          # Overwrite asset with fixed version
+          # Save processed doc
           File.open(asset, 'w') do |f|
-            f << doc.to_html
+            if @options[:fixup]
+              # HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
+              # in html node and adds them anyway. Just remove them here to avoid duplicates.
+              doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
+              doc.write_xhtml_to(f, :encoding => 'UTF-8')
+            else
+              doc.write_html_to(f, :encoding => 'UTF-8')
+            end
           end
         end

data/lib/repub/app/fetcher.rb CHANGED Viewed

@@ -4,10 +4,10 @@ require 'uri'
 require 'iconv'
 require 'rubygems'
-# XXX: suppress warnings from chardet (until they fix them)
-$VERBOSE=false
+old_verbose = $VERBOSE
+$VERBOSE = false
 require 'UniversalDetector'
-$VERBOSE=true
+$VERBOSE = old_verbose
 module Repub
   class App
@@ -101,8 +101,9 @@ module Repub
         end
         def for_url(&block)
-          # if not yet cached, download stuff
-          unless File.exist?(@path)
+          # Download stuff if not yet cached
+          cached = File.exist?(@path)
+          unless cached
             FileUtils.mkdir_p(@path)
             begin
               Dir.chdir(@path) { yield self }
@@ -111,32 +112,33 @@ module Repub
               raise
             end
           else
-            log.debug "-- Already cached in #{@path}"
+            log.info "Using cached assets"
+            log.debug "-- Cache is #{@path}"
           end
-          # do post-download tasks
-          if File.exist?(@path)
-            Dir.chdir(@path) do
-              # enumerate assets
-              @assets = {}
-              AssetTypes.each_pair do |asset_type, file_types|
-                @assets[asset_type] ||= []
-                file_types.each do |file_type|
-                  @assets[asset_type] << Dir.glob("*.#{file_type}")
-                end
-                @assets[asset_type].flatten!
+          # Do post-download tasks
+          Dir.chdir(@path) do
+            # Enumerate assets
+            @assets = {}
+            AssetTypes.each_pair do |asset_type, file_types|
+              @assets[asset_type] ||= []
+              file_types.each do |file_type|
+                @assets[asset_type] << Dir.glob("*.#{file_type}")
               end
-              # detect encoding and convert to utf-8 if needed
+              @assets[asset_type].flatten!
+            end
+            # For freshly downloaded docs, detect encoding and convert to utf-8
+            unless cached
               @assets[:documents].each do |doc|
-                log.debug "-- Detecting encoding for #{doc}"
+                log.info "Detecting encoding for #{doc}"
                 s = IO.read(doc)
                 raise FetcherException, "empty document" unless s
-                encoding = UniversalDetector::chardet(s)['encoding']
+                encoding = UniversalDetector.chardet(s)['encoding']
                 if encoding.downcase != 'utf-8'
-                  log.debug "-- Looks like it's #{encoding}, will convert to UTF-8"
-                  s = Iconv.conv('utf-8', encoding, s)
+                  log.info "Looks like #{encoding}, converting to UTF-8"
+                  s = Iconv.conv('utf-8', encoding, IO.read(doc))
                   File.open(doc, 'w') { |f| f.write(s) }
                 else
-                  log.debug "-- Looks like it's UTF-8, no conversion needed"
+                  log.info "Looks like UTF-8, no conversion needed"
                 end
               end
             end

data/lib/repub/app/options.rb CHANGED Viewed

@@ -3,6 +3,7 @@ require 'optparse'
 module Repub
   class App
     module Options
+      include Logger
       attr_reader :options
@@ -91,10 +92,14 @@ module Repub
           opts.separator "  Parser options:"
           opts.on("-x", "--selector NAME:VALUE", String,
-            "Set parser XPath or CSS selector NAME to VALUE.",
+            "Set parser XPath selector NAME to VALUE.",
             "Recognized selectors are: [title toc toc_item toc_section]"
           ) do |value|
-            name, value = value.split(/:/)
+            begin
+              name, value = value.match(/([^:]+):(.*)/)[1, 2]
+            rescue
+              log.fatal "ERROR: invalid argument: -x '#{value}'. See '#{App.name} --help'."
+            end
             options[:selectors][name.to_sym] = value
           end
@@ -103,7 +108,11 @@ module Repub
             "Valid metadata names are: [creator date description",
             "language publisher relation rights subject title]"
           ) do |value|
-            name, value = value.split(/:/)
+            begin
+              name, value = value.match(/([^:]+):(.*)/)[1, 2]
+            rescue
+              log.fatal "ERROR: invalid argument: -m '#{value}'. See '#{App.name} --help'."
+            end
             options[:metadata][name.to_sym] = value
           end
@@ -125,7 +134,7 @@ module Repub
           ) { |value| options[:css] = File.expand_path(value) }
           opts.on("-X", "--remove SELECTOR", String,
-            "Remove source element using XPath or CSS selector.",
+            "Remove source element using XPath selector.",
             "Use -X- to ignore stored profile."
           ) { |value| value == '-' ? options[:remove] = [] : options[:remove] << value }
@@ -148,15 +157,13 @@ module Repub
         begin
           parser.parse! args
         rescue OptionParser::ParseError => ex
-          STDERR.puts "ERROR: #{ex.to_s}. See '#{App.name} --help'."
-          exit 1
+          log.fatal "ERROR: #{ex.to_s}. See '#{App.name} --help'."
         end
         options[:url] = args.last
         if options[:url].nil? || options[:url].empty?
           help parser
-          STDERR.puts "ERROR: Please specify an URL."
-          exit 1
+          log.fatal "ERROR: Please specify an URL."
         end
       end

data/lib/repub/app/parser.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 require 'rubygems'
-require 'hpricot'
+require 'nokogiri'
 module Repub
   class App
@@ -11,13 +11,13 @@ module Repub
         Parser.new(options).parse(cache)
       end
-      # Default hpricot selectors
+      # Default selectors
       #
       Selectors = {
         :title        => '//h1',
-        :toc          => '//div.toc/ul',
-        :toc_item     => '/li',
-        :toc_section  => '/ul'
+        :toc          => '//ul',
+        :toc_item     => './li',
+        :toc_section  => './ul'
       }
       class Parser
@@ -43,7 +43,7 @@ module Repub
           @cache = cache
           @asset = @cache.assets[:documents][0]
           log.debug "-- Parsing #{@asset}"
-          @doc = Hpricot(open(File.join(@cache.path, @asset)), @fixup)
+          @doc = Nokogiri::HTML.parse(open(File.join(@cache.path, @asset)), nil, 'UTF-8')
           @uid = @cache.name
           parse_title
@@ -64,13 +64,13 @@ module Repub
             if el.children.empty?
               title_text = el.inner_text
             else
-              title_text =  el.children.map{|c| c.inner_text }.join(' ')
+              title_text = el.children.map{|c| c.inner_text }.join(' ')
             end
             @title = title_text.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
             log.info "Found title \"#{@title}\""
           else
             @title = UNTITLED
-            log.warn "** Could not parse document title, using '#{@title}'"
+            log.warn "** Could not find document title, using '#{@title}'"
           end
         end
@@ -80,6 +80,8 @@ module Repub
           @title_html = el ? el.inner_html.gsub(/[\r\n]/, '') : UNTITLED
         end
+        # Helper container for TOC items
+        #
         class TocItem < Struct.new(
             :title,
             :uri,
@@ -102,31 +104,42 @@ module Repub
         def parse_toc
           log.debug "-- Looking for TOC with #{@selectors[:toc]}"
-          el = @doc.at(@selectors[:toc])
+          el = @doc.xpath(@selectors[:toc]).first
           if el
             @toc = parse_toc_section(el)
             log.info "Found TOC with #{@toc.size} top-level items"
           else
             @toc = []
-            log.warn "** Could not parse document table of contents"
+            log.warn "** Could not find document table of contents"
           end
         end
         def parse_toc_section(section)
           toc = []
           log.debug "-- Looking for TOC items with #{@selectors[:toc_item]}"
-          section.search(@selectors[:toc_item]).each do |item|
+          section.xpath(@selectors[:toc_item]).each do |item|
+            # Get item's anchor and href
             a = item.name == 'a' ? item : item.at('a')
-            next if a.nil?
-            href = a['href']
-            next if href.nil?
-            title = item.inner_text.gsub(/\s+/, ' ').strip
-            subitems = nil
+            next if !a
+            href = a[:href]
+            next if !href
+            # Is this a leaf item or node ?
+            subsection = item.xpath(@selectors[:toc_section]).first
+            if subsection
+              # Item has subsection, use anchor text for title
+              title = a.inner_text
+            else
+              # Leaf item, glue inner_text from all children
+              title = item.children.map{|c| c.inner_text }.join(' ')
+            end
+            title = title.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
             log.debug "-- Found item: #{title}"
-            item.search(@selectors[:toc_section]).each do |subsection|
-              log.debug "-- Found section with #{@selectors[:toc_section]} >>>"
+            # Parse sub-section
+            if subsection
+              log.debug "-- Found section with #{@selectors[:toc_section]}"
+              log.debug "-- >"
               subitems = parse_toc_section(subsection)
-              log.debug '-- <<<'
+              log.debug '-- .'
             end
             toc << TocItem.new(title, href, subitems, @asset)
           end

data/repub.gemspec ADDED Viewed

@@ -0,0 +1,48 @@
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{repub}
+  s.version = "0.3.1"
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Dmitri Goutnik"]
+  s.date = %q{2009-06-28}
+  s.default_executable = %q{repub}
+  s.description = %q{Simple HTML to ePub converter.}
+  s.email = %q{dg@invisiblellama.net}
+  s.executables = ["repub"]
+  s.extra_rdoc_files = ["History.txt", "README.txt", "SAMPLES.txt", "bin/repub"]
+  s.files = ["History.txt", "README.txt", "Rakefile", "SAMPLES.txt", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
+  s.homepage = %q{http://github.com/invisiblellama/repub/tree/master}
+  s.rdoc_options = ["--main", "README.txt"]
+  s.require_paths = ["lib"]
+  s.rubyforge_project = %q{repub}
+  s.rubygems_version = %q{1.3.4}
+  s.summary = %q{Simple HTML to ePub converter}
+  s.test_files = ["test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+      s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.2"])
+      s.add_runtime_dependency(%q<builder>, [">= 2.1.2"])
+      s.add_runtime_dependency(%q<chardet>, [">= 0.9.0"])
+      s.add_runtime_dependency(%q<launchy>, [">= 0.3.3"])
+      s.add_development_dependency(%q<bones>, [">= 2.5.1"])
+    else
+      s.add_dependency(%q<nokogiri>, [">= 1.3.2"])
+      s.add_dependency(%q<builder>, [">= 2.1.2"])
+      s.add_dependency(%q<chardet>, [">= 0.9.0"])
+      s.add_dependency(%q<launchy>, [">= 0.3.3"])
+      s.add_dependency(%q<bones>, [">= 2.5.1"])
+    end
+  else
+    s.add_dependency(%q<nokogiri>, [">= 1.3.2"])
+    s.add_dependency(%q<builder>, [">= 2.1.2"])
+    s.add_dependency(%q<chardet>, [">= 0.9.0"])
+    s.add_dependency(%q<launchy>, [">= 0.3.3"])
+    s.add_dependency(%q<bones>, [">= 2.5.1"])
+  end
+end

data/test/epub/test_container.rb CHANGED Viewed

@@ -1,13 +1,13 @@
 require 'test/unit'
 require 'rubygems'
-require 'hpricot'
+require 'nokogiri'
 require 'repub/epub'
 class TestContainer < Test::Unit::TestCase
   def test_container_create
     c = Repub::Epub::Container.new
     s = c.to_xml
-    doc = Hpricot(s)
+    doc = Nokogiri::HTML(s)
     #puts s
     assert_not_nil(doc.search('rootfile'))

data/test/epub/test_content.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'test/unit'
 require 'rubygems'
-require 'hpricot'
+require 'nokogiri'
 require 'repub/epub'
 class TestContent < Test::Unit::TestCase
@@ -8,7 +8,7 @@ class TestContent < Test::Unit::TestCase
     x = Repub::Epub::Content.new('some-name')
     s = x.to_xml
     #puts s
-    doc = Hpricot(s)
+    doc = Nokogiri::HTML(s)
     # manifest was created
     assert_not_nil(doc.search('manifest'))
@@ -35,7 +35,7 @@ class TestContent < Test::Unit::TestCase
     x.add_document 'glossary.html', 'glossary'
     s = x.to_xml
     #puts s
-    doc = Hpricot(s)
+    doc = Nokogiri::HTML(s)
     # manifest was created
     assert_not_nil(doc.search('manifest'))

data/test/epub/test_toc.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'test/unit'
 require 'rubygems'
-require 'hpricot'
+require 'nokogiri'
 require 'repub/epub'
 class TestToc < Test::Unit::TestCase
@@ -8,7 +8,7 @@ class TestToc < Test::Unit::TestCase
     x = Repub::Epub::Toc.new('some-name')
     s = x.to_xml
     #puts s
-    doc = Hpricot(s)
+    doc = Nokogiri::HTML(s)
     # TODO
   end
@@ -23,7 +23,7 @@ class TestToc < Test::Unit::TestCase
     p12 = p1.add_nav_point('Chapter 1-2', 'chapter-1-2.html')
     s = x.to_xml
     #puts s
-    doc = Hpricot(s)
+    doc = Nokogiri::HTML(s)
     # TODO
   end
 end

data/test/test_builder.rb CHANGED Viewed

@@ -3,6 +3,6 @@ require 'repub'
 class TestBuilder < Test::Unit::TestCase
   def test_builder
-    flunk("todo")
+    # TODO
   end
 end

data/test/test_fetcher.rb CHANGED Viewed

@@ -1,36 +1,36 @@
-require 'test/unit'
-require 'repub'
-require 'repub/app'
-class TestFetcher < Test::Unit::TestCase
-  include Repub::App::Fetcher
-  attr_reader :options
-  def test_fetcher
-    @options = {
-      :url            => 'http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html',
-      :helper         => 'wget'
-    }
-    assert_nothing_raised do
-      cache = fetch
-      #p cache
-      assert_equal('http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html', cache.url)
-      assert(cache.path.include?('.repub/cache/f963050ead9ee7775a4155e13743d47bc851d5d8'))
-      assert_equal('f963050ead9ee7775a4155e13743d47bc851d5d8', cache.name)
-      # assert(File.exist?(File.join(f.asset_root, f.asset_name)), "Fetch failed.")
-    end
-  end
-  def test_fetcher_fail
-    @options = {
-      :url            => 'not-existing',
-      :helper         => 'wget'
-    }
-   assert_raise(Repub::App::FetcherException) do
-     cache = fetch
-     #p cache
-   end
-  end
-end
+require 'test/unit'
+require 'repub'
+require 'repub/app'
+class TestFetcher < Test::Unit::TestCase
+  include Repub::App::Fetcher
+  attr_reader :options
+  def test_fetcher
+    @options = {
+      :url            => 'http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html',
+      :helper         => 'wget'
+    }
+    assert_nothing_raised do
+      cache = fetch
+      #p cache
+      assert_equal('http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html', cache.url)
+      assert(cache.path.include?('.repub/cache/f963050ead9ee7775a4155e13743d47bc851d5d8'))
+      assert_equal('f963050ead9ee7775a4155e13743d47bc851d5d8', cache.name)
+      # assert(File.exist?(File.join(f.asset_root, f.asset_name)), "Fetch failed.")
+    end
+  end
+  def test_fetcher_fail
+    @options = {
+      :url            => 'not-existing',
+      :helper         => 'wget'
+    }
+   assert_raise(Repub::App::FetcherException) do
+     cache = fetch
+     #p cache
+   end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: invisiblellama-repub
 version: !ruby/object:Gem::Version
-  version: 0.2.1
+  version: 0.3.1
 platform: ruby
 authors:
 - Dmitri Goutnik
@@ -9,28 +9,28 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-06-26 00:00:00 -07:00
+date: 2009-06-28 00:00:00 -07:00
 default_executable: repub
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: builder
+  name: nokogiri
   type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.1.2
+        version: 1.3.2
     version:
 - !ruby/object:Gem::Dependency
-  name: hpricot
+  name: builder
   type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.8.1
+        version: 2.1.2
     version:
 - !ruby/object:Gem::Dependency
   name: chardet
@@ -62,7 +62,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: 2.5.1
     version:
-description: RePub is a simple HTML to ePub converter.
+description: Simple HTML to ePub converter.
 email: dg@invisiblellama.net
 executables:
 - repub
@@ -71,15 +71,14 @@ extensions: []
 extra_rdoc_files:
 - History.txt
 - README.txt
-- TODO.txt
+- SAMPLES.txt
 - bin/repub
-- lib/repub/mobi/.githidden
 files:
-- .gitignore
 - History.txt
 - README.txt
 - Rakefile
-- TODO.txt
+- SAMPLES.txt
+- TODO
 - bin/repub
 - lib/repub.rb
 - lib/repub/app.rb
@@ -94,7 +93,7 @@ files:
 - lib/repub/epub/container.rb
 - lib/repub/epub/content.rb
 - lib/repub/epub/toc.rb
-- lib/repub/mobi/.githidden
+- repub.gemspec
 - test/epub/test_container.rb
 - test/epub/test_content.rb
 - test/epub/test_toc.rb
@@ -128,7 +127,7 @@ rubyforge_project: repub
 rubygems_version: 1.2.0
 signing_key:
 specification_version: 3
-summary: RePub is a simple HTML to ePub converter
+summary: Simple HTML to ePub converter
 test_files:
 - test/epub/test_container.rb
 - test/epub/test_content.rb

data/.gitignore DELETED Viewed

@@ -1,4 +0,0 @@
-pkg
-tmp
-.eprj
-*.epub

data/lib/repub/mobi/.githidden DELETED Viewed

File without changes