repub 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -0
- data/README.txt +106 -0
- data/Rakefile +30 -0
- data/SAMPLES.txt +23 -0
- data/TODO +3 -0
- data/bin/repub +8 -0
- data/lib/repub.rb +46 -0
- data/lib/repub/app.rb +42 -0
- data/lib/repub/app/builder.rb +208 -0
- data/lib/repub/app/fetcher.rb +164 -0
- data/lib/repub/app/logger.rb +52 -0
- data/lib/repub/app/options.rb +180 -0
- data/lib/repub/app/parser.rb +152 -0
- data/lib/repub/app/profile.rb +91 -0
- data/lib/repub/app/utility.rb +57 -0
- data/lib/repub/epub.rb +3 -0
- data/lib/repub/epub/container.rb +28 -0
- data/lib/repub/epub/content.rb +153 -0
- data/lib/repub/epub/toc.rb +139 -0
- data/repub.gemspec +48 -0
- data/tasks/ann.rake +80 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +201 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +51 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/setup.rb +292 -0
- data/tasks/spec.rake +54 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +40 -0
- data/tasks/zentest.rake +36 -0
- data/test/epub/test_container.rb +15 -0
- data/test/epub/test_content.rb +56 -0
- data/test/epub/test_toc.rb +29 -0
- data/test/test_builder.rb +8 -0
- data/test/test_fetcher.rb +36 -0
- data/test/test_logger.rb +76 -0
- data/test/test_parser.rb +32 -0
- metadata +153 -0
@@ -0,0 +1,164 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'digest/sha1'
|
3
|
+
require 'uri'
|
4
|
+
require 'iconv'
|
5
|
+
require 'rubygems'
|
6
|
+
|
7
|
+
old_verbose = $VERBOSE
|
8
|
+
$VERBOSE = false
|
9
|
+
require 'UniversalDetector'
|
10
|
+
$VERBOSE = old_verbose
|
11
|
+
|
12
|
+
module Repub
|
13
|
+
class App
|
14
|
+
module Fetcher
|
15
|
+
|
16
|
+
class FetcherException < RuntimeError; end
|
17
|
+
|
18
|
+
def fetch
|
19
|
+
Fetcher.new(options).fetch
|
20
|
+
end
|
21
|
+
|
22
|
+
AssetTypes = {
|
23
|
+
:documents => %w[html htm],
|
24
|
+
:stylesheets => %w[css],
|
25
|
+
:images => %w[jpg jpeg png gif svg]
|
26
|
+
}
|
27
|
+
|
28
|
+
class Fetcher
|
29
|
+
include Logger
|
30
|
+
|
31
|
+
Downloaders = {
|
32
|
+
:wget => { :cmd => 'wget', :options => '-nv -E -H -k -p -nH -nd' },
|
33
|
+
:httrack => { :cmd => 'httrack', :options => '-gB -r2 +*.css +*.jpg -*.xml -*.html' }
|
34
|
+
}
|
35
|
+
|
36
|
+
def initialize(options)
|
37
|
+
@options = options
|
38
|
+
@downloader_path, @downloader_options = ENV['REPUB_DOWNLOADER'], ENV['REPUB_DOWNLOADER_OPTIONS']
|
39
|
+
begin
|
40
|
+
downloader = Downloaders[@options[:helper].to_sym] rescue Downloaders[:wget]
|
41
|
+
log.debug "-- Using #{downloader[:cmd]} #{downloader[:options]}"
|
42
|
+
@downloader_path ||= which(downloader[:cmd])
|
43
|
+
@downloader_options ||= downloader[:options]
|
44
|
+
rescue RuntimeError
|
45
|
+
raise FetcherException, "unknown helper '#{@options[:helper]}'"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def fetch
|
50
|
+
url = @options[:url]
|
51
|
+
raise FetcherException, "empty URL" if !url || url.empty?
|
52
|
+
begin
|
53
|
+
URI.parse(url)
|
54
|
+
rescue
|
55
|
+
raise FetcherException, "invalid URL: #{url}"
|
56
|
+
end
|
57
|
+
cmd = "#{@downloader_path} #{@downloader_options} #{url}"
|
58
|
+
Cache.for_url(url) do |cache|
|
59
|
+
log.debug "-- Downloading into #{cache.path}"
|
60
|
+
unless system(cmd) && !cache.empty?
|
61
|
+
raise FetcherException, "Fetch failed."
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def which(cmd)
|
69
|
+
if !RUBY_PLATFORM.match('mswin')
|
70
|
+
cmd = `/usr/bin/which #{cmd}`.strip
|
71
|
+
raise FetcherException, "#{cmd}: helper not found." if cmd.empty?
|
72
|
+
end
|
73
|
+
cmd
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class Cache
|
78
|
+
include Logger
|
79
|
+
|
80
|
+
def self.root
|
81
|
+
return File.join(App.data_path, 'cache')
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.inventorize
|
85
|
+
# TODO
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.cleanup
|
89
|
+
Dir.chdir(self.root) { FileUtils.rm_r(Dir.glob('*')) }
|
90
|
+
rescue
|
91
|
+
# ignore exceptions
|
92
|
+
end
|
93
|
+
|
94
|
+
attr_reader :url
|
95
|
+
attr_reader :name
|
96
|
+
attr_reader :path
|
97
|
+
attr_reader :assets
|
98
|
+
|
99
|
+
def self.for_url(url, &block)
|
100
|
+
self.new(url).for_url(&block)
|
101
|
+
end
|
102
|
+
|
103
|
+
def for_url(&block)
|
104
|
+
# Download stuff if not yet cached
|
105
|
+
cached = File.exist?(@path)
|
106
|
+
unless cached
|
107
|
+
FileUtils.mkdir_p(@path)
|
108
|
+
begin
|
109
|
+
Dir.chdir(@path) { yield self }
|
110
|
+
rescue
|
111
|
+
FileUtils.rm_r(@path)
|
112
|
+
raise
|
113
|
+
end
|
114
|
+
else
|
115
|
+
log.info "Using cached assets"
|
116
|
+
log.debug "-- Cache is #{@path}"
|
117
|
+
end
|
118
|
+
# Do post-download tasks
|
119
|
+
Dir.chdir(@path) do
|
120
|
+
# Enumerate assets
|
121
|
+
@assets = {}
|
122
|
+
AssetTypes.each_pair do |asset_type, file_types|
|
123
|
+
@assets[asset_type] ||= []
|
124
|
+
file_types.each do |file_type|
|
125
|
+
@assets[asset_type] << Dir.glob("*.#{file_type}")
|
126
|
+
end
|
127
|
+
@assets[asset_type].flatten!
|
128
|
+
end
|
129
|
+
# For freshly downloaded docs, detect encoding and convert to utf-8
|
130
|
+
unless cached
|
131
|
+
@assets[:documents].each do |doc|
|
132
|
+
log.info "Detecting encoding for #{doc}"
|
133
|
+
s = IO.read(doc)
|
134
|
+
raise FetcherException, "empty document" unless s
|
135
|
+
encoding = UniversalDetector.chardet(s)['encoding']
|
136
|
+
if encoding.downcase != 'utf-8'
|
137
|
+
log.info "Looks like #{encoding}, converting to UTF-8"
|
138
|
+
s = Iconv.conv('utf-8', encoding, IO.read(doc))
|
139
|
+
File.open(doc, 'w') { |f| f.write(s) }
|
140
|
+
else
|
141
|
+
log.info "Looks like UTF-8, no conversion needed"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
self
|
147
|
+
end
|
148
|
+
|
149
|
+
def empty?
|
150
|
+
Dir.glob(File.join(@path, '*')).empty?
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
def initialize(url)
|
156
|
+
@url = url
|
157
|
+
@name = Digest::SHA1.hexdigest(@url)
|
158
|
+
@path = File.join(Cache.root, @name)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
class App
|
5
|
+
module Logger
|
6
|
+
|
7
|
+
# Logging verbosity
|
8
|
+
#
|
9
|
+
LOGGER_QUIET = 0 # nothing except errors
|
10
|
+
LOGGER_NORMAL = 1 # info and above
|
11
|
+
LOGGER_VERBOSE = 2 # everything, including debuging noise
|
12
|
+
|
13
|
+
def log
|
14
|
+
Logger.instance
|
15
|
+
end
|
16
|
+
|
17
|
+
class Logger
|
18
|
+
include Singleton
|
19
|
+
|
20
|
+
attr_accessor :level
|
21
|
+
attr_accessor :stdout
|
22
|
+
attr_accessor :stderr
|
23
|
+
|
24
|
+
def debug(msg)
|
25
|
+
@stdout.puts(msg) if @level >= LOGGER_VERBOSE
|
26
|
+
end
|
27
|
+
|
28
|
+
def info(msg)
|
29
|
+
@stdout.puts(msg) if @level >= LOGGER_NORMAL
|
30
|
+
end
|
31
|
+
|
32
|
+
def error(msg)
|
33
|
+
@stderr.puts(msg) if @level >= LOGGER_QUIET
|
34
|
+
end
|
35
|
+
alias_method :warn, :error
|
36
|
+
|
37
|
+
def fatal(msg)
|
38
|
+
error(msg)
|
39
|
+
exit 1
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def initialize
|
44
|
+
@level = LOGGER_NORMAL
|
45
|
+
@stdout = STDOUT
|
46
|
+
@stderr = STDERR
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
class App
|
5
|
+
module Options
|
6
|
+
include Logger
|
7
|
+
|
8
|
+
attr_reader :options
|
9
|
+
|
10
|
+
def parse_options(args)
|
11
|
+
|
12
|
+
# Default options
|
13
|
+
@options = {
|
14
|
+
:browser => false,
|
15
|
+
:css => nil,
|
16
|
+
:encoding => nil,
|
17
|
+
:fixup => true,
|
18
|
+
:helper => 'wget',
|
19
|
+
:metadata => {},
|
20
|
+
:output_path => Dir.getwd,
|
21
|
+
:profile => 'default',
|
22
|
+
:remove => [],
|
23
|
+
:rx => [],
|
24
|
+
:selectors => Parser::Selectors,
|
25
|
+
:url => nil,
|
26
|
+
:verbosity => Repub::App::Logger::LOGGER_NORMAL,
|
27
|
+
}
|
28
|
+
|
29
|
+
# Load default profile
|
30
|
+
if load_profile(options[:profile]).empty?
|
31
|
+
write_profile(options[:profile])
|
32
|
+
end
|
33
|
+
|
34
|
+
# Parse command line
|
35
|
+
parser = OptionParser.new do |opts|
|
36
|
+
opts.banner = <<-BANNER.gsub(/^ /,'')
|
37
|
+
|
38
|
+
Repub is a simple HTML to ePub converter.
|
39
|
+
|
40
|
+
Usage: #{App.name} [options] url
|
41
|
+
|
42
|
+
General options:
|
43
|
+
BANNER
|
44
|
+
|
45
|
+
opts.on("-D", "--downloader NAME ", ['wget', 'httrack'],
|
46
|
+
"Which downloader to use to get files (wget or httrack).",
|
47
|
+
"Default is #{options[:helper]}."
|
48
|
+
) { |value| options[:helper] = value }
|
49
|
+
|
50
|
+
opts.on("-o", "--output PATH", String,
|
51
|
+
"Output path for generated ePub file.",
|
52
|
+
"Default is #{options[:output_path]}/<Parsed_Title>.epub"
|
53
|
+
) { |value| options[:output_path] = File.expand_path(value) }
|
54
|
+
|
55
|
+
opts.on("-w", "--write-profile NAME", String,
|
56
|
+
"Save given options for later reuse as profile NAME."
|
57
|
+
) { |value| options[:profile] = value; write_profile(value) }
|
58
|
+
|
59
|
+
opts.on("-l", "--load-profile NAME", String,
|
60
|
+
"Load options from saved profile NAME."
|
61
|
+
) { |value| options[:profile] = value; load_profile(value) }
|
62
|
+
|
63
|
+
opts.on("-W", "--write-default",
|
64
|
+
"Save given options for later reuse as default profile."
|
65
|
+
) { write_profile }
|
66
|
+
|
67
|
+
opts.on("-L", "--list-profiles",
|
68
|
+
"List saved profiles."
|
69
|
+
) { list_profiles; exit 1 }
|
70
|
+
|
71
|
+
opts.on("-C", "--cleanup",
|
72
|
+
"Clean up download cache."
|
73
|
+
) { Fetcher::Cache.cleanup; exit 1 }
|
74
|
+
|
75
|
+
opts.on("-v", "--verbose",
|
76
|
+
"Turn on verbose output."
|
77
|
+
) { options[:verbosity] = Repub::App::Logger::LOGGER_VERBOSE }
|
78
|
+
|
79
|
+
opts.on("-q", "--quiet",
|
80
|
+
"Turn off any output except errors."
|
81
|
+
) { options[:verbosity] = Repub::App::Logger::LOGGER_QUIET }
|
82
|
+
|
83
|
+
opts.on("-V", "--version",
|
84
|
+
"Show version."
|
85
|
+
) { puts Repub.version; exit 1 }
|
86
|
+
|
87
|
+
opts.on("-h", "--help",
|
88
|
+
"Show this help message."
|
89
|
+
) { help opts; exit 1 }
|
90
|
+
|
91
|
+
opts.separator ""
|
92
|
+
opts.separator " Parser options:"
|
93
|
+
|
94
|
+
opts.on("-x", "--selector NAME:VALUE", String,
|
95
|
+
"Set parser XPath selector NAME to VALUE.",
|
96
|
+
"Recognized selectors are: [title toc toc_item toc_section]"
|
97
|
+
) do |value|
|
98
|
+
begin
|
99
|
+
name, value = value.match(/([^:]+):(.*)/)[1, 2]
|
100
|
+
rescue
|
101
|
+
log.fatal "ERROR: invalid argument: -x '#{value}'. See '#{App.name} --help'."
|
102
|
+
end
|
103
|
+
options[:selectors][name.to_sym] = value
|
104
|
+
end
|
105
|
+
|
106
|
+
opts.on("-m", "--meta NAME:VALUE", String,
|
107
|
+
"Set publication information metadata NAME to VALUE.",
|
108
|
+
"Valid metadata names are: [creator date description",
|
109
|
+
"language publisher relation rights subject title]"
|
110
|
+
) do |value|
|
111
|
+
begin
|
112
|
+
name, value = value.match(/([^:]+):(.*)/)[1, 2]
|
113
|
+
rescue
|
114
|
+
log.fatal "ERROR: invalid argument: -m '#{value}'. See '#{App.name} --help'."
|
115
|
+
end
|
116
|
+
options[:metadata][name.to_sym] = value
|
117
|
+
end
|
118
|
+
|
119
|
+
opts.on("-F", "--no-fixup",
|
120
|
+
"Do not attempt to make document meet XHTML 1.0 Strict.",
|
121
|
+
"Default is to try and fix things that are broken. "
|
122
|
+
) { |value| options[:fixup] = false }
|
123
|
+
|
124
|
+
opts.on("-e", "--encoding NAME", String,
|
125
|
+
"Set source document encoding. Default is to autodetect."
|
126
|
+
) { |value| options[:encoding] = value }
|
127
|
+
|
128
|
+
opts.separator ""
|
129
|
+
opts.separator " Post-processing options:"
|
130
|
+
|
131
|
+
opts.on("-s", "--stylesheet PATH", String,
|
132
|
+
"Use custom stylesheet at PATH to add or override existing",
|
133
|
+
"CSS references in the source document."
|
134
|
+
) { |value| options[:css] = File.expand_path(value) }
|
135
|
+
|
136
|
+
opts.on("-X", "--remove SELECTOR", String,
|
137
|
+
"Remove source element using XPath selector.",
|
138
|
+
"Use -X- to ignore stored profile."
|
139
|
+
) { |value| value == '-' ? options[:remove] = [] : options[:remove] << value }
|
140
|
+
|
141
|
+
opts.on("-R", "--rx /PATTERN/REPLACEMENT/", String,
|
142
|
+
"Edit source HTML using regular expressions.",
|
143
|
+
"Use -R- to ignore stored profile."
|
144
|
+
) { |value| value == '-' ? options[:rx] = [] : options[:rx] << value }
|
145
|
+
|
146
|
+
opts.on("-B", "--browse",
|
147
|
+
"After processing, open resulting HTML in default browser."
|
148
|
+
) { |value| options[:browser] = true }
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
if args.empty?
|
153
|
+
help parser
|
154
|
+
exit 1
|
155
|
+
end
|
156
|
+
|
157
|
+
begin
|
158
|
+
parser.parse! args
|
159
|
+
rescue OptionParser::ParseError => ex
|
160
|
+
log.fatal "ERROR: #{ex.to_s}. See '#{App.name} --help'."
|
161
|
+
end
|
162
|
+
|
163
|
+
options[:url] = args.last
|
164
|
+
if options[:url].nil? || options[:url].empty?
|
165
|
+
help parser
|
166
|
+
log.fatal "ERROR: Please specify an URL."
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def help(opts)
|
171
|
+
puts opts
|
172
|
+
puts
|
173
|
+
puts " Current profile (#{options[:profile]}):"
|
174
|
+
dump_profile(options[:profile])
|
175
|
+
puts
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Repub
|
5
|
+
class App
|
6
|
+
module Parser
|
7
|
+
|
8
|
+
class ParserException < RuntimeError; end
|
9
|
+
|
10
|
+
def parse(cache)
|
11
|
+
Parser.new(options).parse(cache)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Default selectors
|
15
|
+
#
|
16
|
+
Selectors = {
|
17
|
+
:title => '//h1',
|
18
|
+
:toc => '//ul',
|
19
|
+
:toc_item => './li',
|
20
|
+
:toc_section => './ul'
|
21
|
+
}
|
22
|
+
|
23
|
+
class Parser
|
24
|
+
include Logger
|
25
|
+
|
26
|
+
attr_reader :cache
|
27
|
+
attr_reader :uid
|
28
|
+
attr_reader :title
|
29
|
+
attr_reader :title_html
|
30
|
+
attr_reader :toc
|
31
|
+
|
32
|
+
def initialize(options)
|
33
|
+
@selectors = options[:selectors] || Selectors
|
34
|
+
@fixup = options[:fixup]
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse(cache)
|
38
|
+
raise ParserException, "No HTML document found" if
|
39
|
+
cache.assets[:documents].empty?
|
40
|
+
raise ParserException, "More than one HTML document found, this is not supported (yet)" if
|
41
|
+
cache.assets[:documents].size > 1
|
42
|
+
|
43
|
+
@cache = cache
|
44
|
+
@asset = @cache.assets[:documents][0]
|
45
|
+
log.debug "-- Parsing #{@asset}"
|
46
|
+
@doc = Nokogiri::HTML.parse(open(File.join(@cache.path, @asset)), nil, 'UTF-8')
|
47
|
+
|
48
|
+
@uid = @cache.name
|
49
|
+
parse_title
|
50
|
+
parse_title_html
|
51
|
+
parse_toc
|
52
|
+
|
53
|
+
self
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
UNTITLED = 'Untitled'
|
59
|
+
|
60
|
+
def parse_title
|
61
|
+
log.debug "-- Looking for title with #{@selectors[:title]}"
|
62
|
+
el = @doc.at(@selectors[:title])
|
63
|
+
if el
|
64
|
+
if el.children.empty?
|
65
|
+
title_text = el.inner_text
|
66
|
+
else
|
67
|
+
title_text = el.children.map{|c| c.inner_text }.join(' ')
|
68
|
+
end
|
69
|
+
@title = title_text.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
|
70
|
+
log.info "Found title \"#{@title}\""
|
71
|
+
else
|
72
|
+
@title = UNTITLED
|
73
|
+
log.warn "** Could not find document title, using '#{@title}'"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def parse_title_html
|
78
|
+
log.debug "-- Looking for html title with #{@selectors[:title]}"
|
79
|
+
el = @doc.at(@selectors[:title])
|
80
|
+
@title_html = el ? el.inner_html.gsub(/[\r\n]/, '') : UNTITLED
|
81
|
+
end
|
82
|
+
|
83
|
+
# Helper container for TOC items
|
84
|
+
#
|
85
|
+
class TocItem < Struct.new(
|
86
|
+
:title,
|
87
|
+
:uri,
|
88
|
+
:fragment_id
|
89
|
+
)
|
90
|
+
|
91
|
+
def initialize(title, uri_with_fragment_id, subitems, asset)
|
92
|
+
self.title = title
|
93
|
+
self.uri, self.fragment_id = uri_with_fragment_id.split(/#/)
|
94
|
+
self.uri = asset if self.uri.empty?
|
95
|
+
@subitems = subitems || []
|
96
|
+
end
|
97
|
+
|
98
|
+
attr_reader :subitems
|
99
|
+
|
100
|
+
def src
|
101
|
+
"#{uri}##{fragment_id}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_toc
|
106
|
+
log.debug "-- Looking for TOC with #{@selectors[:toc]}"
|
107
|
+
el = @doc.xpath(@selectors[:toc]).first
|
108
|
+
if el
|
109
|
+
@toc = parse_toc_section(el)
|
110
|
+
log.info "Found TOC with #{@toc.size} top-level items"
|
111
|
+
else
|
112
|
+
@toc = []
|
113
|
+
log.warn "** Could not find document table of contents"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def parse_toc_section(section)
|
118
|
+
toc = []
|
119
|
+
log.debug "-- Looking for TOC items with #{@selectors[:toc_item]}"
|
120
|
+
section.xpath(@selectors[:toc_item]).each do |item|
|
121
|
+
# Get item's anchor and href
|
122
|
+
a = item.name == 'a' ? item : item.at('a')
|
123
|
+
next if !a
|
124
|
+
href = a[:href]
|
125
|
+
next if !href
|
126
|
+
# Is this a leaf item or node ?
|
127
|
+
subsection = item.xpath(@selectors[:toc_section]).first
|
128
|
+
if subsection
|
129
|
+
# Item has subsection, use anchor text for title
|
130
|
+
title = a.inner_text
|
131
|
+
else
|
132
|
+
# Leaf item, glue inner_text from all children
|
133
|
+
title = item.children.map{|c| c.inner_text }.join(' ')
|
134
|
+
end
|
135
|
+
title = title.gsub(/[\r\n]/, '').gsub(/\s+/, ' ').strip
|
136
|
+
log.debug "-- Found item: #{title}"
|
137
|
+
# Parse sub-section
|
138
|
+
if subsection
|
139
|
+
log.debug "-- Found section with #{@selectors[:toc_section]}"
|
140
|
+
log.debug "-- >"
|
141
|
+
subitems = parse_toc_section(subsection)
|
142
|
+
log.debug '-- .'
|
143
|
+
end
|
144
|
+
toc << TocItem.new(title, href, subitems, @asset)
|
145
|
+
end
|
146
|
+
toc
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|