invisiblellama-repub 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,13 @@
1
- == 0.1 / 2009-06-26
1
+ == 0.2.1 / 2009-06-26
2
2
 
3
3
  * Initial release
4
+
5
+ == 0.3.0 / 2009-06-28
6
+
7
+ * Switched to Nokogiri for HTML parsing
8
+ * Better parsing for hierarchical TOCs
9
+ * Many bug fixes
10
+
11
+ == 0.3.1 / 2009-06-28
12
+
13
+ * Fixed App.data_path bug
data/README.txt CHANGED
@@ -1,27 +1,31 @@
1
1
  == DESCRIPTION:
2
2
 
3
- RePub is a simple HTML to ePub converter.
3
+ Simple HTML to ePub converter.
4
4
 
5
5
  == FEATURES/PROBLEMS:
6
6
 
7
- Few samples to get started: (TODO real description)
7
+ Few samples to get started:
8
+
9
+ * Git User's Manual
10
+
11
+ repub -x 'title://h1' -x 'toc://div[@class="toc"]/dl' -x 'toc_item:dt' -x 'toc_section:following-sibling::*[1]/dl' \
12
+ http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
8
13
 
9
14
  * Project Gutenberg's THE ADVENTURES OF SHERLOCK HOLMES
10
- repub -x 'title://div.book//h1' -x 'toc:body//table' -x 'toc_item://tr' \
11
- -X 'body/pre,body//hr,body/h1,body/h2' \
12
- http://www.gutenberg.org/dirs/etext99/advsh12h.htm
15
+
16
+ repub -x 'title:div[@class='book']//h1' -x 'toc://table' -x 'toc_item://tr' \
17
+ -X '//pre' -X '//hr' -X '//body/h1' -X '//body/h2' \
18
+ http://www.gutenberg.org/dirs/etext99/advsh12h.htm
13
19
 
14
20
  * Project Gutenberg's ALICE'S ADVENTURES IN WONDERLAND
15
- repub -x 'title:body/h1' -x 'toc:body//table' -x 'toc_item://tr' \
16
- -X 'body/pre,body//hr,body/h4' \
17
- http://www.gutenberg.org/files/11/11-h/11-h.htm
21
+
22
+ repub -x 'title:body/h1' -x 'toc://table' -x 'toc_item://tr' \
23
+ -X '//pre' -X '//hr' -X '//body/h4' \
24
+ http://www.gutenberg.org/files/11/11-h/11-h.htm
18
25
 
19
26
  * The Gelug-Kagyu Tradition of Mahamudra from Berzin Archives
20
- repub http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
21
27
 
22
- * Git User's Manual
23
- repub -x 'title://h1' -x 'toc://div.toc/dl' -x 'toc_item:/dt' \
24
- http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
28
+ repub http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
25
29
 
26
30
  == SYNOPSIS:
27
31
 
@@ -43,7 +47,7 @@ General options:
43
47
  -h, --help Show this help message.
44
48
 
45
49
  Parser options:
46
- -x, --selector NAME:VALUE Set parser XPath or CSS selector NAME to VALUE.
50
+ -x, --selector NAME:VALUE Set parser XPath selector NAME to VALUE.
47
51
  Recognized selectors are: [title toc toc_item toc_section]
48
52
  -m, --meta NAME:VALUE Set publication information metadata NAME to VALUE.
49
53
  Valid metadata names are: [creator date description
@@ -55,16 +59,21 @@ Parser options:
55
59
  Post-processing options:
56
60
  -s, --stylesheet PATH Use custom stylesheet at PATH to add or override existing
57
61
  CSS references in the source document.
58
- -X, --remove SELECTOR Remove source element using XPath or CSS selector.
62
+ -X, --remove SELECTOR Remove source element using XPath selector.
59
63
  Use -X- to ignore stored profile.
60
64
  -R, --rx /PATTERN/REPLACEMENT/ Edit source HTML using regular expressions.
61
65
  Use -R- to ignore stored profile.
62
66
  -B, --browse After processing, open resulting HTML in default browser.
63
67
 
64
- == REQUIREMENTS:
68
+ == DEPENDENCIES:
65
69
 
66
- wget or httrack
67
- zip (Info-ZIP)
70
+ * Builder (https://rubyforge.org/projects/builder/)
71
+ * Nokogiri (http://nokogiri.rubyforge.org/nokogiri/)
72
+ * rchardet (https://rubyforge.org/projects/rchardet/)
73
+ * launchy (http://copiousfreetime.rubyforge.org/launchy/)
74
+
75
+ * wget or httrack
76
+ * zip (Info-ZIP)
68
77
 
69
78
  == INSTALL:
70
79
 
@@ -72,9 +81,9 @@ Post-processing options:
72
81
 
73
82
  == LICENSE:
74
83
 
75
- The MIT License
84
+ (The MIT License)
76
85
 
77
- Copyright (c) 2009 Invisible Llama
86
+ Copyright (c) 2009 Invisible Llama <dg@invisiblellama.net>
78
87
 
79
88
  Permission is hereby granted, free of charge, to any person obtaining a copy
80
89
  of this software and associated documentation files (the "Software"), to deal
@@ -93,3 +102,5 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
93
102
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
94
103
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
95
104
  THE SOFTWARE.
105
+
106
+ ==
data/Rakefile CHANGED
@@ -20,11 +20,11 @@ PROJ.email = 'dg@invisiblellama.net'
20
20
  PROJ.url = 'http://github.com/invisiblellama/repub/tree/master'
21
21
  PROJ.version = Repub::VERSION
22
22
  PROJ.rubyforge.name = 'repub'
23
- PROJ.exclude = %w[tmp/ \.git/ \.DS_Store .*\.tmproj ^pkg/]
23
+ PROJ.exclude = %w[tmp/ \.git \.DS_Store .*\.tmproj .*\.epub ^pkg/]
24
24
 
25
25
  PROJ.spec.opts << '--color'
26
26
 
27
+ depend_on 'nokogiri'
27
28
  depend_on 'builder'
28
- depend_on 'hpricot'
29
29
  depend_on 'chardet'
30
30
  depend_on 'launchy'
data/SAMPLES.txt ADDED
@@ -0,0 +1,23 @@
1
+ * THE ADVENTURES OF SHERLOCK HOLMES
2
+
3
+ repub -x 'title:div[@class='book']//h1' -x 'toc://table' -x 'toc_item://tr' -X '//pre' -X '//hr' -X '//body/h1' -X '//body/h2' http://www.gutenberg.org/dirs/etext99/advsh12h.htm
4
+
5
+ * ALICE'S ADVENTURES IN WONDERLAND
6
+
7
+ repub -x 'title:body/h1' -x 'toc://table' -x 'toc_item://tr' -X '//pre' -X '//hr' -X '//body/h4' http://www.gutenberg.org/files/11/11-h/11-h.htm
8
+
9
+ * The Gelug-Kagyu Tradition of Mahamudra
10
+
11
+ repub http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
12
+
13
+ * Брюс Стерлинг. Схизматрица
14
+
15
+ repub -x 'title://h2' -x 'toc://table' -x 'toc_item://a' -X 'div' -X 'table' -X '//hr' http://lib.ru/STERLINGB/shizmatrica.txt_with-big-pictures.html
16
+
17
+ * Айзек Азимов. Космические течения
18
+
19
+ repub -x 'title://h2' -x 'toc://table' -x 'toc_item://a' -X 'div' -X 'table' -X '//hr' http://lib.ru/FOUNDATION/currspac.txt_with-big-pictures.html
20
+
21
+ * Git User's Manual
22
+
23
+ repub -x 'title://h1' -x 'toc://div[@class="toc"]/dl' -x 'toc_item:dt' -x 'toc_section:following-sibling::*[1]/dl' http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
data/{TODO.txt → TODO} RENAMED
@@ -1,2 +1,3 @@
1
1
  √ add support for rx cleaning/modifying source doc
2
2
  √ make -q/-v actually do something
3
+ more parser tokens: author(s) etc
data/bin/repub CHANGED
@@ -1,24 +1,8 @@
1
- #!/usr/bin/env ruby
1
+ #!/usr/bin/env ruby -w
2
2
 
3
3
  require File.expand_path(
4
4
  File.join(File.dirname(__FILE__), %w[.. lib repub]))
5
5
 
6
6
  require 'repub/app'
7
7
 
8
- # THE ADVENTURES OF SHERLOCK HOLMES
9
- # repub -x 'title:body/h1' -x 'toc:body//table' 'toc_item://tr' -X 'body/pre,body//hr,body/h1,body/h2' http://www.gutenberg.org/dirs/etext99/advsh12h.htm
10
- #
11
- # ALICE'S ADVENTURES IN WONDERLAND
12
- # repub -x 'title:body/h1' -x 'toc:body//table' -x 'toc_item://tr' -X 'body/pre,body//hr,body/h4' http://www.gutenberg.org/files/11/11-h/11-h.htm
13
- #
14
- # The Gelug-Kagyu Tradition of Mahamudra
15
- # http://www.berzinarchives.com/web/x/prn/p.html_680632258.html
16
- #
17
- # Брюс Стерлинг. Схизматрица
18
- # repub -x 'title://h2' -x 'toc:table' -x 'toc_item://a' -X 'div,table,//hr' http://lib.ru/STERLINGB/shizmatrica.txt_with-big-pictures.html
19
- #
20
- # Git User's Manual
21
- # repub -x 'title://h1' -x 'toc://div.toc/dl' -x 'toc_item:/dt' http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
22
-
23
-
24
8
  Repub::App.instance.run ARGV
data/lib/repub.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module Repub
2
2
 
3
3
  # :stopdoc:
4
- VERSION = '0.2.1'
4
+ VERSION = '0.3.1'
5
5
  LIBPATH = File.expand_path(File.dirname(__FILE__)) + File::SEPARATOR
6
6
  PATH = File.dirname(LIBPATH) + File::SEPARATOR
7
7
  # :startdoc:
data/lib/repub/app.rb CHANGED
@@ -2,9 +2,9 @@ require 'singleton'
2
2
  require 'rubygems'
3
3
  require 'launchy'
4
4
  require 'repub/app/utility'
5
+ require 'repub/app/logger'
5
6
  require 'repub/app/options'
6
7
  require 'repub/app/profile'
7
- require 'repub/app/logger'
8
8
  require 'repub/app/fetcher'
9
9
  require 'repub/app/parser'
10
10
  require 'repub/app/builder'
@@ -21,7 +21,9 @@ module Repub
21
21
  end
22
22
 
23
23
  def self.data_path
24
- File.join(File.expand_path('~'), '.repub')
24
+ data_path = File.join(File.expand_path('~'), '.repub')
25
+ FileUtils.mkdir_p(data_path) unless File.exist?(data_path)
26
+ data_path
25
27
  end
26
28
 
27
29
  def run(args)
@@ -97,18 +97,17 @@ module Repub
97
97
  log.debug "-- Adding missing doctype"
98
98
  source = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + source
99
99
  end
100
- # Overwrite asset with fixed version
100
+ # Save processed file
101
101
  File.open(asset, 'w') do |f|
102
102
  f.write(source)
103
103
  end
104
104
  end
105
105
 
106
106
  def postprocess_doc(asset)
107
- # Do Hpricot magic if fixup is ON
108
- doc = Hpricot(open(asset), :xhtml_strict => @options[:fixup])
109
- # Substitute custom stylesheet
107
+ doc = Nokogiri::HTML.parse(open(asset), nil, 'UTF-8')
108
+ # Substitute custom CSS
110
109
  if (@options[:css] && !@options[:css].empty?)
111
- doc.search('//link[@rel="stylesheet"]') do |link|
110
+ doc.xpath('//link[@rel="stylesheet"]') do |link|
112
111
  link[:href] = File.basename(@options[:css])
113
112
  log.debug "-- Replacing CSS refs with #{link[:href]}"
114
113
  end
@@ -116,13 +115,22 @@ module Repub
116
115
  # Remove elements
117
116
  if @options[:remove] && !@options[:remove].empty?
118
117
  @options[:remove].each do |selector|
119
- log.info "Removing element(s) matching selector \"#{selector}\""
118
+ log.info "Removing elements matching selector \"#{selector}\""
119
+ #p doc.search(selector).size
120
+ #p doc.search(selector)
120
121
  doc.search(selector).remove
121
122
  end
122
123
  end
123
- # Overwrite asset with fixed version
124
+ # Save processed doc
124
125
  File.open(asset, 'w') do |f|
125
- f << doc.to_html
126
+ if @options[:fixup]
127
+ # HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
128
+ # in html node and adds them anyway. Just remove them here to avoid duplicates.
129
+ doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
130
+ doc.write_xhtml_to(f, :encoding => 'UTF-8')
131
+ else
132
+ doc.write_html_to(f, :encoding => 'UTF-8')
133
+ end
126
134
  end
127
135
  end
128
136
 
@@ -4,10 +4,10 @@ require 'uri'
4
4
  require 'iconv'
5
5
  require 'rubygems'
6
6
 
7
- # XXX: suppress warnings from chardet (until they fix them)
8
- $VERBOSE=false
7
+ old_verbose = $VERBOSE
8
+ $VERBOSE = false
9
9
  require 'UniversalDetector'
10
- $VERBOSE=true
10
+ $VERBOSE = old_verbose
11
11
 
12
12
  module Repub
13
13
  class App
@@ -101,8 +101,9 @@ module Repub
101
101
  end
102
102
 
103
103
  def for_url(&block)
104
- # if not yet cached, download stuff
105
- unless File.exist?(@path)
104
+ # Download stuff if not yet cached
105
+ cached = File.exist?(@path)
106
+ unless cached
106
107
  FileUtils.mkdir_p(@path)
107
108
  begin
108
109
  Dir.chdir(@path) { yield self }
@@ -111,32 +112,33 @@ module Repub
111
112
  raise
112
113
  end
113
114
  else
114
- log.debug "-- Already cached in #{@path}"
115
+ log.info "Using cached assets"
116
+ log.debug "-- Cache is #{@path}"
115
117
  end
116
- # do post-download tasks
117
- if File.exist?(@path)
118
- Dir.chdir(@path) do
119
- # enumerate assets
120
- @assets = {}
121
- AssetTypes.each_pair do |asset_type, file_types|
122
- @assets[asset_type] ||= []
123
- file_types.each do |file_type|
124
- @assets[asset_type] << Dir.glob("*.#{file_type}")
125
- end
126
- @assets[asset_type].flatten!
118
+ # Do post-download tasks
119
+ Dir.chdir(@path) do
120
+ # Enumerate assets
121
+ @assets = {}
122
+ AssetTypes.each_pair do |asset_type, file_types|
123
+ @assets[asset_type] ||= []
124
+ file_types.each do |file_type|
125
+ @assets[asset_type] << Dir.glob("*.#{file_type}")
127
126
  end
128
- # detect encoding and convert to utf-8 if needed
127
+ @assets[asset_type].flatten!
128
+ end
129
+ # For freshly downloaded docs, detect encoding and convert to utf-8
130
+ unless cached
129
131
  @assets[:documents].each do |doc|
130
- log.debug "-- Detecting encoding for #{doc}"
132
+ log.info "Detecting encoding for #{doc}"
131
133
  s = IO.read(doc)
132
134
  raise FetcherException, "empty document" unless s
133
- encoding = UniversalDetector::chardet(s)['encoding']
135
+ encoding = UniversalDetector.chardet(s)['encoding']
134
136
  if encoding.downcase != 'utf-8'
135
- log.debug "-- Looks like it's #{encoding}, will convert to UTF-8"
136
- s = Iconv.conv('utf-8', encoding, s)
137
+ log.info "Looks like #{encoding}, converting to UTF-8"
138
+ s = Iconv.conv('utf-8', encoding, IO.read(doc))
137
139
  File.open(doc, 'w') { |f| f.write(s) }
138
140
  else
139
- log.debug "-- Looks like it's UTF-8, no conversion needed"
141
+ log.info "Looks like UTF-8, no conversion needed"
140
142
  end
141
143
  end
142
144
  end
@@ -3,6 +3,7 @@ require 'optparse'
3
3
  module Repub
4
4
  class App
5
5
  module Options
6
+ include Logger
6
7
 
7
8
  attr_reader :options
8
9
 
@@ -91,10 +92,14 @@ module Repub
91
92
  opts.separator " Parser options:"
92
93
 
93
94
  opts.on("-x", "--selector NAME:VALUE", String,
94
- "Set parser XPath or CSS selector NAME to VALUE.",
95
+ "Set parser XPath selector NAME to VALUE.",
95
96
  "Recognized selectors are: [title toc toc_item toc_section]"
96
97
  ) do |value|
97
- name, value = value.split(/:/)
98
+ begin
99
+ name, value = value.match(/([^:]+):(.*)/)[1, 2]
100
+ rescue
101
+ log.fatal "ERROR: invalid argument: -x '#{value}'. See '#{App.name} --help'."
102
+ end
98
103
  options[:selectors][name.to_sym] = value
99
104
  end
100
105
 
@@ -103,7 +108,11 @@ module Repub
103
108
  "Valid metadata names are: [creator date description",
104
109
  "language publisher relation rights subject title]"
105
110
  ) do |value|
106
- name, value = value.split(/:/)
111
+ begin
112
+ name, value = value.match(/([^:]+):(.*)/)[1, 2]
113
+ rescue
114
+ log.fatal "ERROR: invalid argument: -m '#{value}'. See '#{App.name} --help'."
115
+ end
107
116
  options[:metadata][name.to_sym] = value
108
117
  end
109
118
 
@@ -125,7 +134,7 @@ module Repub
125
134
  ) { |value| options[:css] = File.expand_path(value) }
126
135
 
127
136
  opts.on("-X", "--remove SELECTOR", String,
128
- "Remove source element using XPath or CSS selector.",
137
+ "Remove source element using XPath selector.",
129
138
  "Use -X- to ignore stored profile."
130
139
  ) { |value| value == '-' ? options[:remove] = [] : options[:remove] << value }
131
140
 
@@ -148,15 +157,13 @@ module Repub
148
157
  begin
149
158
  parser.parse! args
150
159
  rescue OptionParser::ParseError => ex
151
- STDERR.puts "ERROR: #{ex.to_s}. See '#{App.name} --help'."
152
- exit 1
160
+ log.fatal "ERROR: #{ex.to_s}. See '#{App.name} --help'."
153
161
  end
154
162
 
155
163
  options[:url] = args.last
156
164
  if options[:url].nil? || options[:url].empty?
157
165
  help parser
158
- STDERR.puts "ERROR: Please specify an URL."
159
- exit 1
166
+ log.fatal "ERROR: Please specify an URL."
160
167
  end
161
168
  end
162
169
 
@@ -1,5 +1,5 @@
1
1
  require 'rubygems'
2
- require 'hpricot'
2
+ require 'nokogiri'
3
3
 
4
4
  module Repub
5
5
  class App
@@ -11,13 +11,13 @@ module Repub
11
11
  Parser.new(options).parse(cache)
12
12
  end
13
13
 
14
- # Default hpricot selectors
14
+ # Default selectors
15
15
  #
16
16
  Selectors = {
17
17
  :title => '//h1',
18
- :toc => '//div.toc/ul',
19
- :toc_item => '/li',
20
- :toc_section => '/ul'
18
+ :toc => '//ul',
19
+ :toc_item => './li',
20
+ :toc_section => './ul'
21
21
  }
22
22
 
23
23
  class Parser
@@ -43,7 +43,7 @@ module Repub
43
43
  @cache = cache
44
44
  @asset = @cache.assets[:documents][0]
45
45
  log.debug "-- Parsing #{@asset}"
46
- @doc = Hpricot(open(File.join(@cache.path, @asset)), @fixup)
46
+ @doc = Nokogiri::HTML.parse(open(File.join(@cache.path, @asset)), nil, 'UTF-8')
47
47
 
48
48
  @uid = @cache.name
49
49
  parse_title
@@ -64,13 +64,13 @@ module Repub
64
64
  if el.children.empty?
65
65
  title_text = el.inner_text
66
66
  else
67
- title_text = el.children.map{|c| c.inner_text }.join(' ')
67
+ title_text = el.children.map{|c| c.inner_text }.join(' ')
68
68
  end
69
69
  @title = title_text.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
70
70
  log.info "Found title \"#{@title}\""
71
71
  else
72
72
  @title = UNTITLED
73
- log.warn "** Could not parse document title, using '#{@title}'"
73
+ log.warn "** Could not find document title, using '#{@title}'"
74
74
  end
75
75
  end
76
76
 
@@ -80,6 +80,8 @@ module Repub
80
80
  @title_html = el ? el.inner_html.gsub(/[\r\n]/, '') : UNTITLED
81
81
  end
82
82
 
83
+ # Helper container for TOC items
84
+ #
83
85
  class TocItem < Struct.new(
84
86
  :title,
85
87
  :uri,
@@ -102,31 +104,42 @@ module Repub
102
104
 
103
105
  def parse_toc
104
106
  log.debug "-- Looking for TOC with #{@selectors[:toc]}"
105
- el = @doc.at(@selectors[:toc])
107
+ el = @doc.xpath(@selectors[:toc]).first
106
108
  if el
107
109
  @toc = parse_toc_section(el)
108
110
  log.info "Found TOC with #{@toc.size} top-level items"
109
111
  else
110
112
  @toc = []
111
- log.warn "** Could not parse document table of contents"
113
+ log.warn "** Could not find document table of contents"
112
114
  end
113
115
  end
114
116
 
115
117
  def parse_toc_section(section)
116
118
  toc = []
117
119
  log.debug "-- Looking for TOC items with #{@selectors[:toc_item]}"
118
- section.search(@selectors[:toc_item]).each do |item|
120
+ section.xpath(@selectors[:toc_item]).each do |item|
121
+ # Get item's anchor and href
119
122
  a = item.name == 'a' ? item : item.at('a')
120
- next if a.nil?
121
- href = a['href']
122
- next if href.nil?
123
- title = item.inner_text.gsub(/\s+/, ' ').strip
124
- subitems = nil
123
+ next if !a
124
+ href = a[:href]
125
+ next if !href
126
+ # Is this a leaf item or node ?
127
+ subsection = item.xpath(@selectors[:toc_section]).first
128
+ if subsection
129
+ # Item has subsection, use anchor text for title
130
+ title = a.inner_text
131
+ else
132
+ # Leaf item, glue inner_text from all children
133
+ title = item.children.map{|c| c.inner_text }.join(' ')
134
+ end
135
+ title = title.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
125
136
  log.debug "-- Found item: #{title}"
126
- item.search(@selectors[:toc_section]).each do |subsection|
127
- log.debug "-- Found section with #{@selectors[:toc_section]} >>>"
137
+ # Parse sub-section
138
+ if subsection
139
+ log.debug "-- Found section with #{@selectors[:toc_section]}"
140
+ log.debug "-- >"
128
141
  subitems = parse_toc_section(subsection)
129
- log.debug '-- <<<'
142
+ log.debug '-- .'
130
143
  end
131
144
  toc << TocItem.new(title, href, subitems, @asset)
132
145
  end
data/repub.gemspec ADDED
@@ -0,0 +1,48 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{repub}
5
+ s.version = "0.3.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Dmitri Goutnik"]
9
+ s.date = %q{2009-06-28}
10
+ s.default_executable = %q{repub}
11
+ s.description = %q{Simple HTML to ePub converter.}
12
+ s.email = %q{dg@invisiblellama.net}
13
+ s.executables = ["repub"]
14
+ s.extra_rdoc_files = ["History.txt", "README.txt", "SAMPLES.txt", "bin/repub"]
15
+ s.files = ["History.txt", "README.txt", "Rakefile", "SAMPLES.txt", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
16
+ s.homepage = %q{http://github.com/invisiblellama/repub/tree/master}
17
+ s.rdoc_options = ["--main", "README.txt"]
18
+ s.require_paths = ["lib"]
19
+ s.rubyforge_project = %q{repub}
20
+ s.rubygems_version = %q{1.3.4}
21
+ s.summary = %q{Simple HTML to ePub converter}
22
+ s.test_files = ["test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
23
+
24
+ if s.respond_to? :specification_version then
25
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
26
+ s.specification_version = 3
27
+
28
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
29
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.2"])
30
+ s.add_runtime_dependency(%q<builder>, [">= 2.1.2"])
31
+ s.add_runtime_dependency(%q<chardet>, [">= 0.9.0"])
32
+ s.add_runtime_dependency(%q<launchy>, [">= 0.3.3"])
33
+ s.add_development_dependency(%q<bones>, [">= 2.5.1"])
34
+ else
35
+ s.add_dependency(%q<nokogiri>, [">= 1.3.2"])
36
+ s.add_dependency(%q<builder>, [">= 2.1.2"])
37
+ s.add_dependency(%q<chardet>, [">= 0.9.0"])
38
+ s.add_dependency(%q<launchy>, [">= 0.3.3"])
39
+ s.add_dependency(%q<bones>, [">= 2.5.1"])
40
+ end
41
+ else
42
+ s.add_dependency(%q<nokogiri>, [">= 1.3.2"])
43
+ s.add_dependency(%q<builder>, [">= 2.1.2"])
44
+ s.add_dependency(%q<chardet>, [">= 0.9.0"])
45
+ s.add_dependency(%q<launchy>, [">= 0.3.3"])
46
+ s.add_dependency(%q<bones>, [">= 2.5.1"])
47
+ end
48
+ end
@@ -1,13 +1,13 @@
1
1
  require 'test/unit'
2
2
  require 'rubygems'
3
- require 'hpricot'
3
+ require 'nokogiri'
4
4
  require 'repub/epub'
5
5
 
6
6
  class TestContainer < Test::Unit::TestCase
7
7
  def test_container_create
8
8
  c = Repub::Epub::Container.new
9
9
  s = c.to_xml
10
- doc = Hpricot(s)
10
+ doc = Nokogiri::HTML(s)
11
11
  #puts s
12
12
 
13
13
  assert_not_nil(doc.search('rootfile'))
@@ -1,6 +1,6 @@
1
1
  require 'test/unit'
2
2
  require 'rubygems'
3
- require 'hpricot'
3
+ require 'nokogiri'
4
4
  require 'repub/epub'
5
5
 
6
6
  class TestContent < Test::Unit::TestCase
@@ -8,7 +8,7 @@ class TestContent < Test::Unit::TestCase
8
8
  x = Repub::Epub::Content.new('some-name')
9
9
  s = x.to_xml
10
10
  #puts s
11
- doc = Hpricot(s)
11
+ doc = Nokogiri::HTML(s)
12
12
 
13
13
  # manifest was created
14
14
  assert_not_nil(doc.search('manifest'))
@@ -35,7 +35,7 @@ class TestContent < Test::Unit::TestCase
35
35
  x.add_document 'glossary.html', 'glossary'
36
36
  s = x.to_xml
37
37
  #puts s
38
- doc = Hpricot(s)
38
+ doc = Nokogiri::HTML(s)
39
39
 
40
40
  # manifest was created
41
41
  assert_not_nil(doc.search('manifest'))
@@ -1,6 +1,6 @@
1
1
  require 'test/unit'
2
2
  require 'rubygems'
3
- require 'hpricot'
3
+ require 'nokogiri'
4
4
  require 'repub/epub'
5
5
 
6
6
  class TestToc < Test::Unit::TestCase
@@ -8,7 +8,7 @@ class TestToc < Test::Unit::TestCase
8
8
  x = Repub::Epub::Toc.new('some-name')
9
9
  s = x.to_xml
10
10
  #puts s
11
- doc = Hpricot(s)
11
+ doc = Nokogiri::HTML(s)
12
12
  # TODO
13
13
  end
14
14
 
@@ -23,7 +23,7 @@ class TestToc < Test::Unit::TestCase
23
23
  p12 = p1.add_nav_point('Chapter 1-2', 'chapter-1-2.html')
24
24
  s = x.to_xml
25
25
  #puts s
26
- doc = Hpricot(s)
26
+ doc = Nokogiri::HTML(s)
27
27
  # TODO
28
28
  end
29
29
  end
data/test/test_builder.rb CHANGED
@@ -3,6 +3,6 @@ require 'repub'
3
3
 
4
4
  class TestBuilder < Test::Unit::TestCase
5
5
  def test_builder
6
- flunk("todo")
6
+ # TODO
7
7
  end
8
8
  end
data/test/test_fetcher.rb CHANGED
@@ -1,36 +1,36 @@
1
- require 'test/unit'
2
- require 'repub'
3
- require 'repub/app'
4
-
5
- class TestFetcher < Test::Unit::TestCase
6
-
7
- include Repub::App::Fetcher
8
- attr_reader :options
9
-
10
- def test_fetcher
11
- @options = {
12
- :url => 'http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html',
13
- :helper => 'wget'
14
- }
15
- assert_nothing_raised do
16
- cache = fetch
17
- #p cache
18
- assert_equal('http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html', cache.url)
19
- assert(cache.path.include?('.repub/cache/f963050ead9ee7775a4155e13743d47bc851d5d8'))
20
- assert_equal('f963050ead9ee7775a4155e13743d47bc851d5d8', cache.name)
21
- # assert(File.exist?(File.join(f.asset_root, f.asset_name)), "Fetch failed.")
22
- end
23
- end
24
-
25
- def test_fetcher_fail
26
- @options = {
27
- :url => 'not-existing',
28
- :helper => 'wget'
29
- }
30
- assert_raise(Repub::App::FetcherException) do
31
- cache = fetch
32
- #p cache
33
- end
34
- end
35
-
36
- end
1
+ require 'test/unit'
2
+ require 'repub'
3
+ require 'repub/app'
4
+
5
+ class TestFetcher < Test::Unit::TestCase
6
+
7
+ include Repub::App::Fetcher
8
+ attr_reader :options
9
+
10
+ def test_fetcher
11
+ @options = {
12
+ :url => 'http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html',
13
+ :helper => 'wget'
14
+ }
15
+ assert_nothing_raised do
16
+ cache = fetch
17
+ #p cache
18
+ assert_equal('http://www.berzinarchives.com/web/x/prn/p.html_1614431902.html', cache.url)
19
+ assert(cache.path.include?('.repub/cache/f963050ead9ee7775a4155e13743d47bc851d5d8'))
20
+ assert_equal('f963050ead9ee7775a4155e13743d47bc851d5d8', cache.name)
21
+ # assert(File.exist?(File.join(f.asset_root, f.asset_name)), "Fetch failed.")
22
+ end
23
+ end
24
+
25
+ def test_fetcher_fail
26
+ @options = {
27
+ :url => 'not-existing',
28
+ :helper => 'wget'
29
+ }
30
+ assert_raise(Repub::App::FetcherException) do
31
+ cache = fetch
32
+ #p cache
33
+ end
34
+ end
35
+
36
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: invisiblellama-repub
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitri Goutnik
@@ -9,28 +9,28 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-26 00:00:00 -07:00
12
+ date: 2009-06-28 00:00:00 -07:00
13
13
  default_executable: repub
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: builder
16
+ name: nokogiri
17
17
  type: :runtime
18
18
  version_requirement:
19
19
  version_requirements: !ruby/object:Gem::Requirement
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
- version: 2.1.2
23
+ version: 1.3.2
24
24
  version:
25
25
  - !ruby/object:Gem::Dependency
26
- name: hpricot
26
+ name: builder
27
27
  type: :runtime
28
28
  version_requirement:
29
29
  version_requirements: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 0.8.1
33
+ version: 2.1.2
34
34
  version:
35
35
  - !ruby/object:Gem::Dependency
36
36
  name: chardet
@@ -62,7 +62,7 @@ dependencies:
62
62
  - !ruby/object:Gem::Version
63
63
  version: 2.5.1
64
64
  version:
65
- description: RePub is a simple HTML to ePub converter.
65
+ description: Simple HTML to ePub converter.
66
66
  email: dg@invisiblellama.net
67
67
  executables:
68
68
  - repub
@@ -71,15 +71,14 @@ extensions: []
71
71
  extra_rdoc_files:
72
72
  - History.txt
73
73
  - README.txt
74
- - TODO.txt
74
+ - SAMPLES.txt
75
75
  - bin/repub
76
- - lib/repub/mobi/.githidden
77
76
  files:
78
- - .gitignore
79
77
  - History.txt
80
78
  - README.txt
81
79
  - Rakefile
82
- - TODO.txt
80
+ - SAMPLES.txt
81
+ - TODO
83
82
  - bin/repub
84
83
  - lib/repub.rb
85
84
  - lib/repub/app.rb
@@ -94,7 +93,7 @@ files:
94
93
  - lib/repub/epub/container.rb
95
94
  - lib/repub/epub/content.rb
96
95
  - lib/repub/epub/toc.rb
97
- - lib/repub/mobi/.githidden
96
+ - repub.gemspec
98
97
  - test/epub/test_container.rb
99
98
  - test/epub/test_content.rb
100
99
  - test/epub/test_toc.rb
@@ -128,7 +127,7 @@ rubyforge_project: repub
128
127
  rubygems_version: 1.2.0
129
128
  signing_key:
130
129
  specification_version: 3
131
- summary: RePub is a simple HTML to ePub converter
130
+ summary: Simple HTML to ePub converter
132
131
  test_files:
133
132
  - test/epub/test_container.rb
134
133
  - test/epub/test_content.rb
data/.gitignore DELETED
@@ -1,4 +0,0 @@
1
- pkg
2
- tmp
3
- .eprj
4
- *.epub
File without changes