repub 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/README.txt +106 -0
- data/Rakefile +30 -0
- data/SAMPLES.txt +23 -0
- data/TODO +3 -0
- data/bin/repub +8 -0
- data/lib/repub.rb +46 -0
- data/lib/repub/app.rb +42 -0
- data/lib/repub/app/builder.rb +208 -0
- data/lib/repub/app/fetcher.rb +164 -0
- data/lib/repub/app/logger.rb +52 -0
- data/lib/repub/app/options.rb +180 -0
- data/lib/repub/app/parser.rb +152 -0
- data/lib/repub/app/profile.rb +91 -0
- data/lib/repub/app/utility.rb +57 -0
- data/lib/repub/epub.rb +3 -0
- data/lib/repub/epub/container.rb +28 -0
- data/lib/repub/epub/content.rb +153 -0
- data/lib/repub/epub/toc.rb +139 -0
- data/repub.gemspec +48 -0
- data/tasks/ann.rake +80 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +201 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +51 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/setup.rb +292 -0
- data/tasks/spec.rake +54 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +40 -0
- data/tasks/zentest.rake +36 -0
- data/test/epub/test_container.rb +15 -0
- data/test/epub/test_content.rb +56 -0
- data/test/epub/test_toc.rb +29 -0
- data/test/test_builder.rb +8 -0
- data/test/test_fetcher.rb +36 -0
- data/test/test_logger.rb +76 -0
- data/test/test_parser.rb +32 -0
- metadata +153 -0
@@ -0,0 +1,164 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'digest/sha1'
|
3
|
+
require 'uri'
|
4
|
+
require 'iconv'
|
5
|
+
require 'rubygems'
|
6
|
+
|
7
|
+
old_verbose = $VERBOSE
|
8
|
+
$VERBOSE = false
|
9
|
+
require 'UniversalDetector'
|
10
|
+
$VERBOSE = old_verbose
|
11
|
+
|
12
|
+
module Repub
|
13
|
+
class App
|
14
|
+
module Fetcher
|
15
|
+
|
16
|
+
class FetcherException < RuntimeError; end
|
17
|
+
|
18
|
+
def fetch
|
19
|
+
Fetcher.new(options).fetch
|
20
|
+
end
|
21
|
+
|
22
|
+
AssetTypes = {
|
23
|
+
:documents => %w[html htm],
|
24
|
+
:stylesheets => %w[css],
|
25
|
+
:images => %w[jpg jpeg png gif svg]
|
26
|
+
}
|
27
|
+
|
28
|
+
class Fetcher
|
29
|
+
include Logger
|
30
|
+
|
31
|
+
Downloaders = {
|
32
|
+
:wget => { :cmd => 'wget', :options => '-nv -E -H -k -p -nH -nd' },
|
33
|
+
:httrack => { :cmd => 'httrack', :options => '-gB -r2 +*.css +*.jpg -*.xml -*.html' }
|
34
|
+
}
|
35
|
+
|
36
|
+
def initialize(options)
|
37
|
+
@options = options
|
38
|
+
@downloader_path, @downloader_options = ENV['REPUB_DOWNLOADER'], ENV['REPUB_DOWNLOADER_OPTIONS']
|
39
|
+
begin
|
40
|
+
downloader = Downloaders[@options[:helper].to_sym] rescue Downloaders[:wget]
|
41
|
+
log.debug "-- Using #{downloader[:cmd]} #{downloader[:options]}"
|
42
|
+
@downloader_path ||= which(downloader[:cmd])
|
43
|
+
@downloader_options ||= downloader[:options]
|
44
|
+
rescue RuntimeError
|
45
|
+
raise FetcherException, "unknown helper '#{@options[:helper]}'"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def fetch
|
50
|
+
url = @options[:url]
|
51
|
+
raise FetcherException, "empty URL" if !url || url.empty?
|
52
|
+
begin
|
53
|
+
URI.parse(url)
|
54
|
+
rescue
|
55
|
+
raise FetcherException, "invalid URL: #{url}"
|
56
|
+
end
|
57
|
+
cmd = "#{@downloader_path} #{@downloader_options} #{url}"
|
58
|
+
Cache.for_url(url) do |cache|
|
59
|
+
log.debug "-- Downloading into #{cache.path}"
|
60
|
+
unless system(cmd) && !cache.empty?
|
61
|
+
raise FetcherException, "Fetch failed."
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def which(cmd)
|
69
|
+
if !RUBY_PLATFORM.match('mswin')
|
70
|
+
cmd = `/usr/bin/which #{cmd}`.strip
|
71
|
+
raise FetcherException, "#{cmd}: helper not found." if cmd.empty?
|
72
|
+
end
|
73
|
+
cmd
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class Cache
|
78
|
+
include Logger
|
79
|
+
|
80
|
+
def self.root
|
81
|
+
return File.join(App.data_path, 'cache')
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.inventorize
|
85
|
+
# TODO
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.cleanup
|
89
|
+
Dir.chdir(self.root) { FileUtils.rm_r(Dir.glob('*')) }
|
90
|
+
rescue
|
91
|
+
# ignore exceptions
|
92
|
+
end
|
93
|
+
|
94
|
+
attr_reader :url
|
95
|
+
attr_reader :name
|
96
|
+
attr_reader :path
|
97
|
+
attr_reader :assets
|
98
|
+
|
99
|
+
def self.for_url(url, &block)
|
100
|
+
self.new(url).for_url(&block)
|
101
|
+
end
|
102
|
+
|
103
|
+
def for_url(&block)
|
104
|
+
# Download stuff if not yet cached
|
105
|
+
cached = File.exist?(@path)
|
106
|
+
unless cached
|
107
|
+
FileUtils.mkdir_p(@path)
|
108
|
+
begin
|
109
|
+
Dir.chdir(@path) { yield self }
|
110
|
+
rescue
|
111
|
+
FileUtils.rm_r(@path)
|
112
|
+
raise
|
113
|
+
end
|
114
|
+
else
|
115
|
+
log.info "Using cached assets"
|
116
|
+
log.debug "-- Cache is #{@path}"
|
117
|
+
end
|
118
|
+
# Do post-download tasks
|
119
|
+
Dir.chdir(@path) do
|
120
|
+
# Enumerate assets
|
121
|
+
@assets = {}
|
122
|
+
AssetTypes.each_pair do |asset_type, file_types|
|
123
|
+
@assets[asset_type] ||= []
|
124
|
+
file_types.each do |file_type|
|
125
|
+
@assets[asset_type] << Dir.glob("*.#{file_type}")
|
126
|
+
end
|
127
|
+
@assets[asset_type].flatten!
|
128
|
+
end
|
129
|
+
# For freshly downloaded docs, detect encoding and convert to utf-8
|
130
|
+
unless cached
|
131
|
+
@assets[:documents].each do |doc|
|
132
|
+
log.info "Detecting encoding for #{doc}"
|
133
|
+
s = IO.read(doc)
|
134
|
+
raise FetcherException, "empty document" unless s
|
135
|
+
encoding = UniversalDetector.chardet(s)['encoding']
|
136
|
+
if encoding.downcase != 'utf-8'
|
137
|
+
log.info "Looks like #{encoding}, converting to UTF-8"
|
138
|
+
s = Iconv.conv('utf-8', encoding, IO.read(doc))
|
139
|
+
File.open(doc, 'w') { |f| f.write(s) }
|
140
|
+
else
|
141
|
+
log.info "Looks like UTF-8, no conversion needed"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
self
|
147
|
+
end
|
148
|
+
|
149
|
+
def empty?
|
150
|
+
Dir.glob(File.join(@path, '*')).empty?
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
def initialize(url)
|
156
|
+
@url = url
|
157
|
+
@name = Digest::SHA1.hexdigest(@url)
|
158
|
+
@path = File.join(Cache.root, @name)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
class App
|
5
|
+
module Logger
|
6
|
+
|
7
|
+
# Logging verbosity
|
8
|
+
#
|
9
|
+
LOGGER_QUIET = 0 # nothing except errors
|
10
|
+
LOGGER_NORMAL = 1 # info and above
|
11
|
+
LOGGER_VERBOSE = 2 # everything, including debuging noise
|
12
|
+
|
13
|
+
def log
|
14
|
+
Logger.instance
|
15
|
+
end
|
16
|
+
|
17
|
+
class Logger
|
18
|
+
include Singleton
|
19
|
+
|
20
|
+
attr_accessor :level
|
21
|
+
attr_accessor :stdout
|
22
|
+
attr_accessor :stderr
|
23
|
+
|
24
|
+
def debug(msg)
|
25
|
+
@stdout.puts(msg) if @level >= LOGGER_VERBOSE
|
26
|
+
end
|
27
|
+
|
28
|
+
def info(msg)
|
29
|
+
@stdout.puts(msg) if @level >= LOGGER_NORMAL
|
30
|
+
end
|
31
|
+
|
32
|
+
def error(msg)
|
33
|
+
@stderr.puts(msg) if @level >= LOGGER_QUIET
|
34
|
+
end
|
35
|
+
alias_method :warn, :error
|
36
|
+
|
37
|
+
def fatal(msg)
|
38
|
+
error(msg)
|
39
|
+
exit 1
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def initialize
|
44
|
+
@level = LOGGER_NORMAL
|
45
|
+
@stdout = STDOUT
|
46
|
+
@stderr = STDERR
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
class App
|
5
|
+
module Options
|
6
|
+
include Logger
|
7
|
+
|
8
|
+
attr_reader :options
|
9
|
+
|
10
|
+
def parse_options(args)
|
11
|
+
|
12
|
+
# Default options
|
13
|
+
@options = {
|
14
|
+
:browser => false,
|
15
|
+
:css => nil,
|
16
|
+
:encoding => nil,
|
17
|
+
:fixup => true,
|
18
|
+
:helper => 'wget',
|
19
|
+
:metadata => {},
|
20
|
+
:output_path => Dir.getwd,
|
21
|
+
:profile => 'default',
|
22
|
+
:remove => [],
|
23
|
+
:rx => [],
|
24
|
+
:selectors => Parser::Selectors,
|
25
|
+
:url => nil,
|
26
|
+
:verbosity => Repub::App::Logger::LOGGER_NORMAL,
|
27
|
+
}
|
28
|
+
|
29
|
+
# Load default profile
|
30
|
+
if load_profile(options[:profile]).empty?
|
31
|
+
write_profile(options[:profile])
|
32
|
+
end
|
33
|
+
|
34
|
+
# Parse command line
|
35
|
+
parser = OptionParser.new do |opts|
|
36
|
+
opts.banner = <<-BANNER.gsub(/^ /,'')
|
37
|
+
|
38
|
+
Repub is a simple HTML to ePub converter.
|
39
|
+
|
40
|
+
Usage: #{App.name} [options] url
|
41
|
+
|
42
|
+
General options:
|
43
|
+
BANNER
|
44
|
+
|
45
|
+
opts.on("-D", "--downloader NAME ", ['wget', 'httrack'],
|
46
|
+
"Which downloader to use to get files (wget or httrack).",
|
47
|
+
"Default is #{options[:helper]}."
|
48
|
+
) { |value| options[:helper] = value }
|
49
|
+
|
50
|
+
opts.on("-o", "--output PATH", String,
|
51
|
+
"Output path for generated ePub file.",
|
52
|
+
"Default is #{options[:output_path]}/<Parsed_Title>.epub"
|
53
|
+
) { |value| options[:output_path] = File.expand_path(value) }
|
54
|
+
|
55
|
+
opts.on("-w", "--write-profile NAME", String,
|
56
|
+
"Save given options for later reuse as profile NAME."
|
57
|
+
) { |value| options[:profile] = value; write_profile(value) }
|
58
|
+
|
59
|
+
opts.on("-l", "--load-profile NAME", String,
|
60
|
+
"Load options from saved profile NAME."
|
61
|
+
) { |value| options[:profile] = value; load_profile(value) }
|
62
|
+
|
63
|
+
opts.on("-W", "--write-default",
|
64
|
+
"Save given options for later reuse as default profile."
|
65
|
+
) { write_profile }
|
66
|
+
|
67
|
+
opts.on("-L", "--list-profiles",
|
68
|
+
"List saved profiles."
|
69
|
+
) { list_profiles; exit 1 }
|
70
|
+
|
71
|
+
opts.on("-C", "--cleanup",
|
72
|
+
"Clean up download cache."
|
73
|
+
) { Fetcher::Cache.cleanup; exit 1 }
|
74
|
+
|
75
|
+
opts.on("-v", "--verbose",
|
76
|
+
"Turn on verbose output."
|
77
|
+
) { options[:verbosity] = Repub::App::Logger::LOGGER_VERBOSE }
|
78
|
+
|
79
|
+
opts.on("-q", "--quiet",
|
80
|
+
"Turn off any output except errors."
|
81
|
+
) { options[:verbosity] = Repub::App::Logger::LOGGER_QUIET }
|
82
|
+
|
83
|
+
opts.on("-V", "--version",
|
84
|
+
"Show version."
|
85
|
+
) { puts Repub.version; exit 1 }
|
86
|
+
|
87
|
+
opts.on("-h", "--help",
|
88
|
+
"Show this help message."
|
89
|
+
) { help opts; exit 1 }
|
90
|
+
|
91
|
+
opts.separator ""
|
92
|
+
opts.separator " Parser options:"
|
93
|
+
|
94
|
+
opts.on("-x", "--selector NAME:VALUE", String,
|
95
|
+
"Set parser XPath selector NAME to VALUE.",
|
96
|
+
"Recognized selectors are: [title toc toc_item toc_section]"
|
97
|
+
) do |value|
|
98
|
+
begin
|
99
|
+
name, value = value.match(/([^:]+):(.*)/)[1, 2]
|
100
|
+
rescue
|
101
|
+
log.fatal "ERROR: invalid argument: -x '#{value}'. See '#{App.name} --help'."
|
102
|
+
end
|
103
|
+
options[:selectors][name.to_sym] = value
|
104
|
+
end
|
105
|
+
|
106
|
+
opts.on("-m", "--meta NAME:VALUE", String,
|
107
|
+
"Set publication information metadata NAME to VALUE.",
|
108
|
+
"Valid metadata names are: [creator date description",
|
109
|
+
"language publisher relation rights subject title]"
|
110
|
+
) do |value|
|
111
|
+
begin
|
112
|
+
name, value = value.match(/([^:]+):(.*)/)[1, 2]
|
113
|
+
rescue
|
114
|
+
log.fatal "ERROR: invalid argument: -m '#{value}'. See '#{App.name} --help'."
|
115
|
+
end
|
116
|
+
options[:metadata][name.to_sym] = value
|
117
|
+
end
|
118
|
+
|
119
|
+
opts.on("-F", "--no-fixup",
|
120
|
+
"Do not attempt to make document meet XHTML 1.0 Strict.",
|
121
|
+
"Default is to try and fix things that are broken. "
|
122
|
+
) { |value| options[:fixup] = false }
|
123
|
+
|
124
|
+
opts.on("-e", "--encoding NAME", String,
|
125
|
+
"Set source document encoding. Default is to autodetect."
|
126
|
+
) { |value| options[:encoding] = value }
|
127
|
+
|
128
|
+
opts.separator ""
|
129
|
+
opts.separator " Post-processing options:"
|
130
|
+
|
131
|
+
opts.on("-s", "--stylesheet PATH", String,
|
132
|
+
"Use custom stylesheet at PATH to add or override existing",
|
133
|
+
"CSS references in the source document."
|
134
|
+
) { |value| options[:css] = File.expand_path(value) }
|
135
|
+
|
136
|
+
opts.on("-X", "--remove SELECTOR", String,
|
137
|
+
"Remove source element using XPath selector.",
|
138
|
+
"Use -X- to ignore stored profile."
|
139
|
+
) { |value| value == '-' ? options[:remove] = [] : options[:remove] << value }
|
140
|
+
|
141
|
+
opts.on("-R", "--rx /PATTERN/REPLACEMENT/", String,
|
142
|
+
"Edit source HTML using regular expressions.",
|
143
|
+
"Use -R- to ignore stored profile."
|
144
|
+
) { |value| value == '-' ? options[:rx] = [] : options[:rx] << value }
|
145
|
+
|
146
|
+
opts.on("-B", "--browse",
|
147
|
+
"After processing, open resulting HTML in default browser."
|
148
|
+
) { |value| options[:browser] = true }
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
if args.empty?
|
153
|
+
help parser
|
154
|
+
exit 1
|
155
|
+
end
|
156
|
+
|
157
|
+
begin
|
158
|
+
parser.parse! args
|
159
|
+
rescue OptionParser::ParseError => ex
|
160
|
+
log.fatal "ERROR: #{ex.to_s}. See '#{App.name} --help'."
|
161
|
+
end
|
162
|
+
|
163
|
+
options[:url] = args.last
|
164
|
+
if options[:url].nil? || options[:url].empty?
|
165
|
+
help parser
|
166
|
+
log.fatal "ERROR: Please specify an URL."
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def help(opts)
|
171
|
+
puts opts
|
172
|
+
puts
|
173
|
+
puts " Current profile (#{options[:profile]}):"
|
174
|
+
dump_profile(options[:profile])
|
175
|
+
puts
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Repub
|
5
|
+
class App
|
6
|
+
module Parser
|
7
|
+
|
8
|
+
class ParserException < RuntimeError; end
|
9
|
+
|
10
|
+
def parse(cache)
|
11
|
+
Parser.new(options).parse(cache)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Default selectors
|
15
|
+
#
|
16
|
+
Selectors = {
|
17
|
+
:title => '//h1',
|
18
|
+
:toc => '//ul',
|
19
|
+
:toc_item => './li',
|
20
|
+
:toc_section => './ul'
|
21
|
+
}
|
22
|
+
|
23
|
+
class Parser
|
24
|
+
include Logger
|
25
|
+
|
26
|
+
attr_reader :cache
|
27
|
+
attr_reader :uid
|
28
|
+
attr_reader :title
|
29
|
+
attr_reader :title_html
|
30
|
+
attr_reader :toc
|
31
|
+
|
32
|
+
def initialize(options)
|
33
|
+
@selectors = options[:selectors] || Selectors
|
34
|
+
@fixup = options[:fixup]
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse(cache)
|
38
|
+
raise ParserException, "No HTML document found" if
|
39
|
+
cache.assets[:documents].empty?
|
40
|
+
raise ParserException, "More than one HTML document found, this is not supported (yet)" if
|
41
|
+
cache.assets[:documents].size > 1
|
42
|
+
|
43
|
+
@cache = cache
|
44
|
+
@asset = @cache.assets[:documents][0]
|
45
|
+
log.debug "-- Parsing #{@asset}"
|
46
|
+
@doc = Nokogiri::HTML.parse(open(File.join(@cache.path, @asset)), nil, 'UTF-8')
|
47
|
+
|
48
|
+
@uid = @cache.name
|
49
|
+
parse_title
|
50
|
+
parse_title_html
|
51
|
+
parse_toc
|
52
|
+
|
53
|
+
self
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
UNTITLED = 'Untitled'
|
59
|
+
|
60
|
+
def parse_title
|
61
|
+
log.debug "-- Looking for title with #{@selectors[:title]}"
|
62
|
+
el = @doc.at(@selectors[:title])
|
63
|
+
if el
|
64
|
+
if el.children.empty?
|
65
|
+
title_text = el.inner_text
|
66
|
+
else
|
67
|
+
title_text = el.children.map{|c| c.inner_text }.join(' ')
|
68
|
+
end
|
69
|
+
@title = title_text.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
|
70
|
+
log.info "Found title \"#{@title}\""
|
71
|
+
else
|
72
|
+
@title = UNTITLED
|
73
|
+
log.warn "** Could not find document title, using '#{@title}'"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def parse_title_html
|
78
|
+
log.debug "-- Looking for html title with #{@selectors[:title]}"
|
79
|
+
el = @doc.at(@selectors[:title])
|
80
|
+
@title_html = el ? el.inner_html.gsub(/[\r\n]/, '') : UNTITLED
|
81
|
+
end
|
82
|
+
|
83
|
+
# Helper container for TOC items
|
84
|
+
#
|
85
|
+
class TocItem < Struct.new(
|
86
|
+
:title,
|
87
|
+
:uri,
|
88
|
+
:fragment_id
|
89
|
+
)
|
90
|
+
|
91
|
+
def initialize(title, uri_with_fragment_id, subitems, asset)
|
92
|
+
self.title = title
|
93
|
+
self.uri, self.fragment_id = uri_with_fragment_id.split(/#/)
|
94
|
+
self.uri = asset if self.uri.empty?
|
95
|
+
@subitems = subitems || []
|
96
|
+
end
|
97
|
+
|
98
|
+
attr_reader :subitems
|
99
|
+
|
100
|
+
def src
|
101
|
+
"#{uri}##{fragment_id}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_toc
|
106
|
+
log.debug "-- Looking for TOC with #{@selectors[:toc]}"
|
107
|
+
el = @doc.xpath(@selectors[:toc]).first
|
108
|
+
if el
|
109
|
+
@toc = parse_toc_section(el)
|
110
|
+
log.info "Found TOC with #{@toc.size} top-level items"
|
111
|
+
else
|
112
|
+
@toc = []
|
113
|
+
log.warn "** Could not find document table of contents"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def parse_toc_section(section)
|
118
|
+
toc = []
|
119
|
+
log.debug "-- Looking for TOC items with #{@selectors[:toc_item]}"
|
120
|
+
section.xpath(@selectors[:toc_item]).each do |item|
|
121
|
+
# Get item's anchor and href
|
122
|
+
a = item.name == 'a' ? item : item.at('a')
|
123
|
+
next if !a
|
124
|
+
href = a[:href]
|
125
|
+
next if !href
|
126
|
+
# Is this a leaf item or node ?
|
127
|
+
subsection = item.xpath(@selectors[:toc_section]).first
|
128
|
+
if subsection
|
129
|
+
# Item has subsection, use anchor text for title
|
130
|
+
title = a.inner_text
|
131
|
+
else
|
132
|
+
# Leaf item, glue inner_text from all children
|
133
|
+
title = item.children.map{|c| c.inner_text }.join(' ')
|
134
|
+
end
|
135
|
+
title = title.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
|
136
|
+
log.debug "-- Found item: #{title}"
|
137
|
+
# Parse sub-section
|
138
|
+
if subsection
|
139
|
+
log.debug "-- Found section with #{@selectors[:toc_section]}"
|
140
|
+
log.debug "-- >"
|
141
|
+
subitems = parse_toc_section(subsection)
|
142
|
+
log.debug '-- .'
|
143
|
+
end
|
144
|
+
toc << TocItem.new(title, href, subitems, @asset)
|
145
|
+
end
|
146
|
+
toc
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|