repub 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +11 -0
- data/README.rdoc +14 -8
- data/TODO +0 -2
- data/lib/repub.rb +1 -1
- data/lib/repub/app.rb +3 -0
- data/lib/repub/app/builder.rb +151 -154
- data/lib/repub/app/fetcher.rb +10 -23
- data/lib/repub/app/filter.rb +30 -0
- data/lib/repub/app/options.rb +0 -6
- data/lib/repub/app/parser.rb +63 -73
- data/lib/repub/app/post_filters.rb +135 -0
- data/lib/repub/app/pre_filters.rb +50 -0
- data/lib/repub/app/profile.rb +1 -1
- data/lib/repub/epub.rb +4 -3
- data/lib/repub/epub/container_item.rb +49 -0
- data/lib/repub/epub/{toc.rb → ncx.rb} +137 -139
- data/lib/repub/epub/ocf.rb +62 -0
- data/lib/repub/epub/opf.rb +136 -0
- data/repub.gemspec +4 -4
- data/test/epub/{test_toc.rb → test_ncx.rb} +14 -12
- data/test/epub/test_ocf.rb +28 -0
- data/test/epub/{test_content.rb → test_opf.rb} +25 -19
- data/test/test_filter.rb +28 -0
- data/test/test_parser.rb +3 -4
- metadata +17 -11
- data/lib/repub/epub/container.rb +0 -28
- data/lib/repub/epub/content.rb +0 -178
- data/test/epub/test_container.rb +0 -15
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.3.4 / 2009-07-17
|
2
|
+
|
3
|
+
* Bug fixes
|
4
|
+
|
5
|
+
* Pre- and post processing filters moved to separate modules.
|
6
|
+
* Non-conformant element IDs are now fixed automaticly
|
7
|
+
* Regardless of the source settings, doctype now is always set to XHTML 1.0 Transitional
|
8
|
+
* -F (disable fixups) option removed, fixups are always on
|
9
|
+
* Documentation updates
|
10
|
+
* More tests
|
11
|
+
|
1
12
|
== 0.3.3 / 2009-07-05
|
2
13
|
|
3
14
|
* New features
|
data/README.rdoc
CHANGED
@@ -27,9 +27,9 @@ broken too bad) be readable but will be lacking any metadata or TOC.
|
|
27
27
|
|
28
28
|
Few examples:
|
29
29
|
|
30
|
-
* Project Gutenberg's
|
30
|
+
* Project Gutenberg's The Adventures Of Sherlock Holmes (with proper table of contents)
|
31
31
|
|
32
|
-
repub -x 'title:div[@class=
|
32
|
+
repub -x 'title:div[@class="book"]//h1' \
|
33
33
|
-x 'toc://table' \
|
34
34
|
-x 'toc_item://tr' \
|
35
35
|
http://www.gutenberg.org/dirs/etext99/advsh12h.htm
|
@@ -38,7 +38,7 @@ This tells Repub to look for title in the first found H1 in the DIV of class "bo
|
|
38
38
|
located in the first TABLE and TOC item can be found inside TR.
|
39
39
|
The above will produce readable ePub which can be further enhanced by removing some "noise" content:
|
40
40
|
|
41
|
-
repub -x 'title:div[@class=
|
41
|
+
repub -x 'title:div[@class="book"]//h1' \
|
42
42
|
-x 'toc://table' \
|
43
43
|
-x 'toc_item://tr' \
|
44
44
|
-X '//pre' -X '//hr' -X '//body/h1' -X '//body/h2' \
|
@@ -69,6 +69,14 @@ For example, if you later decide to regenerate Git Manual ePub without TOC at th
|
|
69
69
|
|
70
70
|
Few more examples:
|
71
71
|
|
72
|
+
* Open Packaging Format (OPF) 2.0 (one of the ePub standards, in ePub)
|
73
|
+
|
74
|
+
repub -x 'title://p[@class="Title"]' \
|
75
|
+
-x 'toc://div[@class="TOC"]' \
|
76
|
+
-x 'toc_item:.//p' \
|
77
|
+
-x 'toc_section:.//div[@class="TOCSection"]' \
|
78
|
+
http://www.idpf.org/2007/opf/OPF_2.0_final_spec.html
|
79
|
+
|
72
80
|
* GNU Wget Manual
|
73
81
|
|
74
82
|
repub -m 'creator:gnu.org' \
|
@@ -76,7 +84,7 @@ Few more examples:
|
|
76
84
|
-X '//div[@class="contents"]' \
|
77
85
|
http://www.gnu.org/software/wget/manual/wget.html
|
78
86
|
|
79
|
-
*
|
87
|
+
* And finally, the "Hello World" of e-books, Alice's Adventures In Wonderland
|
80
88
|
|
81
89
|
repub -x 'title:body/h1' -x 'toc://table' -x 'toc_item://tr' -X '//pre' -X '//hr' -X '//body/h4' \
|
82
90
|
http://www.gutenberg.org/files/11/11-h/11-h.htm
|
@@ -108,8 +116,6 @@ Parser options:
|
|
108
116
|
-m, --meta NAME:VALUE Set publication information metadata NAME to VALUE.
|
109
117
|
Valid metadata names are: [creator date description
|
110
118
|
language publisher relation rights subject title]
|
111
|
-
-F, --no-fixup Do not attempt to make document meet XHTML 1.0 Strict.
|
112
|
-
Default is to try and fix things that are broken.
|
113
119
|
-e, --encoding NAME Set source document encoding. Default is to autodetect.
|
114
120
|
|
115
121
|
Post-processing options:
|
@@ -144,13 +150,13 @@ Currently, only "everything-on-one-page" HTML sources are supported. Repub will
|
|
144
150
|
|
145
151
|
Encoding auto-detection is slow.
|
146
152
|
|
147
|
-
Chardet 0.9.0 is broken under Ruby 1.9.
|
153
|
+
Chardet 0.9.0 is broken under Ruby 1.9 so if you want to use Ruby 1.9 you have to set encoding manually with -e.
|
148
154
|
|
149
155
|
Bugs: probably. If you find any, please report them to dg at invisiblellama dot net.
|
150
156
|
|
151
157
|
== INSTALL:
|
152
158
|
|
153
|
-
gem install repub
|
159
|
+
sudo gem install repub
|
154
160
|
|
155
161
|
== LICENSE:
|
156
162
|
|
data/TODO
CHANGED
data/lib/repub.rb
CHANGED
data/lib/repub/app.rb
CHANGED
@@ -5,6 +5,9 @@ require 'repub/app/utility'
|
|
5
5
|
require 'repub/app/logger'
|
6
6
|
require 'repub/app/options'
|
7
7
|
require 'repub/app/profile'
|
8
|
+
require 'repub/app/filter'
|
9
|
+
require 'repub/app/pre_filters'
|
10
|
+
require 'repub/app/post_filters'
|
8
11
|
require 'repub/app/fetcher'
|
9
12
|
require 'repub/app/parser'
|
10
13
|
require 'repub/app/builder'
|
data/lib/repub/app/builder.rb
CHANGED
@@ -13,7 +13,7 @@ module Repub
|
|
13
13
|
end
|
14
14
|
|
15
15
|
class Builder
|
16
|
-
include
|
16
|
+
include Logger
|
17
17
|
|
18
18
|
attr_reader :output_path
|
19
19
|
attr_reader :document_path
|
@@ -25,45 +25,50 @@ module Repub
|
|
25
25
|
def build(parser)
|
26
26
|
@parser = parser
|
27
27
|
|
28
|
-
# Initialize
|
29
|
-
@
|
28
|
+
# Initialize Container
|
29
|
+
@ocf = Epub::OCF.new
|
30
|
+
|
31
|
+
# Initialize Package
|
32
|
+
@opf = Epub::OPF.new(@parser.uid)
|
33
|
+
@ocf << @opf
|
30
34
|
# Default title is the parsed one
|
31
|
-
@
|
35
|
+
@opf.metadata.title = @parser.title
|
32
36
|
# Override metadata values specified in options
|
33
37
|
if @options[:metadata]
|
34
|
-
@
|
38
|
+
@opf.metadata.members.each do |m|
|
35
39
|
m = m.to_sym
|
36
|
-
|
40
|
+
# Do not allow to override uid
|
41
|
+
next if m == :identifier
|
37
42
|
if @options[:metadata][m]
|
38
|
-
@
|
39
|
-
log.debug "-- Setting metadata #{m} to \"#{@
|
43
|
+
@opf.metadata[m] = @options[:metadata][m]
|
44
|
+
log.debug "-- Setting metadata #{m} to \"#{@opf.metadata[m]}\""
|
40
45
|
end
|
41
46
|
end
|
42
47
|
end
|
43
48
|
|
44
|
-
# Initialize
|
45
|
-
@
|
46
|
-
|
47
|
-
@
|
49
|
+
# Initialize TOC
|
50
|
+
@ncx = Epub::NCX.new(@parser.uid)
|
51
|
+
@opf << @ncx
|
52
|
+
@ncx.title = @opf.metadata.title
|
53
|
+
@ncx.nav_map.points = @parser.toc
|
48
54
|
|
49
55
|
# Setup output filename and path
|
50
56
|
@output_path = File.expand_path(@options[:output_path].if_blank('.'))
|
51
57
|
if File.exist?(@output_path) && File.directory?(@output_path)
|
52
|
-
@output_path = File.join(@output_path, @
|
58
|
+
@output_path = File.join(@output_path, @opf.metadata.title.gsub(/\s/, '_'))
|
53
59
|
end
|
54
60
|
@output_path = @output_path + '.epub'
|
55
|
-
log.debug "--
|
61
|
+
log.debug "-- Output path is #{@output_path}"
|
56
62
|
|
57
63
|
# Build EPUB
|
58
64
|
tmpdir = Dir.mktmpdir(App::name)
|
59
65
|
begin
|
60
66
|
FileUtils.chdir(tmpdir) do
|
61
67
|
copy_and_process_assets
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
write_epub
|
68
|
+
@ncx.save
|
69
|
+
@opf.save
|
70
|
+
@ocf.save
|
71
|
+
@ocf.zip(@output_path)
|
67
72
|
end
|
68
73
|
ensure
|
69
74
|
# Keep tmp folder if we're going open processed doc in browser
|
@@ -74,19 +79,17 @@ module Repub
|
|
74
79
|
|
75
80
|
private
|
76
81
|
|
77
|
-
MetaInf = 'META-INF'
|
78
|
-
|
79
82
|
def copy_and_process_assets
|
80
83
|
# Copy html
|
81
|
-
@parser.cache.assets[:documents].each do |
|
82
|
-
log.debug "-- Processing document #{
|
84
|
+
@parser.cache.assets[:documents].each do |file|
|
85
|
+
log.debug "-- Processing document #{file}"
|
83
86
|
# Copy asset from cache
|
84
|
-
FileUtils.cp(File.join(@parser.cache.path,
|
87
|
+
FileUtils.cp(File.join(@parser.cache.path, file), '.')
|
85
88
|
# Do post-processing
|
86
|
-
|
87
|
-
|
88
|
-
@
|
89
|
-
@document_path = File.expand_path(
|
89
|
+
apply_file_filters(file)
|
90
|
+
apply_document_filters(file)
|
91
|
+
@opf << file
|
92
|
+
@document_path = File.expand_path(file)
|
90
93
|
end
|
91
94
|
|
92
95
|
# Copy css
|
@@ -95,158 +98,152 @@ module Repub
|
|
95
98
|
@parser.cache.assets[:stylesheets].each do |css|
|
96
99
|
log.debug "-- Copying stylesheet #{css}"
|
97
100
|
FileUtils.cp(File.join(@parser.cache.path, css), '.')
|
98
|
-
@
|
101
|
+
@opf << css
|
99
102
|
end
|
100
103
|
elsif @options[:css] != '-'
|
101
104
|
# Copy custom css
|
102
105
|
log.debug "-- Using custom stylesheet #{@options[:css]}"
|
103
106
|
FileUtils.cp(@options[:css], '.')
|
104
|
-
@
|
107
|
+
@opf << File.basename(@options[:css])
|
105
108
|
end
|
106
109
|
|
107
110
|
# Copy images
|
108
111
|
@parser.cache.assets[:images].each do |image|
|
109
112
|
log.debug "-- Copying image #{image}"
|
110
113
|
FileUtils.cp(File.join(@parser.cache.path, image), '.')
|
111
|
-
@
|
114
|
+
@opf << image
|
112
115
|
end
|
113
116
|
|
114
117
|
# Copy external custom files (-a option)
|
115
118
|
@options[:add].each do |file|
|
116
119
|
log.debug "-- Copying external file #{file}"
|
117
120
|
FileUtils.cp(file, '.')
|
118
|
-
@
|
121
|
+
@opf << file
|
119
122
|
end if @options[:add]
|
120
123
|
end
|
121
|
-
|
122
|
-
def postprocess_file(asset)
|
123
|
-
source = IO.read(asset)
|
124
|
-
|
125
|
-
# Do rx substitutions
|
126
|
-
@options[:rx].each do |rx|
|
127
|
-
rx.strip!
|
128
|
-
delimiter = rx[0, 1]
|
129
|
-
rx = rx.gsub(/\\#{delimiter}/, "\n")
|
130
|
-
ra = rx.split(/#{delimiter}/).reject {|e| e.empty? }.each {|e| e.gsub!(/\n/, "#{delimiter}")}
|
131
|
-
raise ParserException, "Invalid regular expression" if ra.empty? || ra[0].nil? || ra.size > 2
|
132
|
-
pattern = ra[0]
|
133
|
-
replacement = ra[1] || ''
|
134
|
-
log.info "Replacing pattern /#{pattern.gsub(/#{delimiter}/, "\\#{delimiter}")}/ with \"#{replacement}\""
|
135
|
-
source.gsub!(Regexp.new(pattern), replacement)
|
136
|
-
end if @options[:rx]
|
137
|
-
|
138
|
-
# Add doctype if missing
|
139
|
-
if source !~ /\s*<!DOCTYPE/
|
140
|
-
log.debug "-- Adding missing doctype"
|
141
|
-
source = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + source
|
142
|
-
end
|
143
|
-
|
144
|
-
# Save processed file
|
145
|
-
File.open(asset, 'w') do |f|
|
146
|
-
f.write(source)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
def postprocess_doc(asset)
|
151
|
-
doc = Nokogiri::HTML.parse(IO.read(asset), nil, 'UTF-8')
|
152
|
-
|
153
|
-
# Set Content-Type charset to UTF-8
|
154
|
-
doc.xpath('//head/meta[@http-equiv="Content-Type"]').each do |el|
|
155
|
-
el['content'] = 'text/html; charset=utf-8'
|
156
|
-
end
|
157
|
-
|
158
|
-
# Process styles
|
159
|
-
if @options[:css] && !@options[:css].empty?
|
160
|
-
# Remove all stylesheet links
|
161
|
-
doc.xpath('//head/link[@rel="stylesheet"]').remove
|
162
|
-
if @options[:css] == '-'
|
163
|
-
# Also remove all inline styles
|
164
|
-
doc.xpath('//head/style').remove
|
165
|
-
log.info "Removing all stylesheet links and style elements"
|
166
|
-
else
|
167
|
-
# Add custom stylesheet link
|
168
|
-
link = Nokogiri::XML::Node.new('link', doc)
|
169
|
-
link['rel'] = 'stylesheet'
|
170
|
-
link['type'] = 'text/css'
|
171
|
-
link['href'] = File.basename(@options[:css])
|
172
|
-
# Add as the last child so it has precedence over (possible) inline styles before
|
173
|
-
doc.at('//head').add_child(link)
|
174
|
-
log.info "Replacing CSS refs with \"#{link['href']}\""
|
175
|
-
end
|
176
|
-
end
|
177
124
|
|
178
|
-
|
179
|
-
@options
|
180
|
-
|
181
|
-
fragment = e[selector]
|
182
|
-
element = doc.xpath(selector).first
|
183
|
-
if element
|
184
|
-
log.info "Inserting fragment \"#{fragment.to_html}\" after \"#{selector}\""
|
185
|
-
fragment.children.to_a.reverse.each {|node| element.add_next_sibling(node) }
|
186
|
-
end
|
187
|
-
end if @options[:after]
|
188
|
-
@options[:before].each do |e|
|
189
|
-
selector = e.keys.first
|
190
|
-
fragment = e[selector]
|
191
|
-
element = doc.xpath(selector).first
|
192
|
-
if element
|
193
|
-
log.info "Inserting fragment \"#{fragment}\" before \"#{selector}\""
|
194
|
-
fragment.children.to_a.each {|node| element.add_previous_sibling(node) }
|
195
|
-
end
|
196
|
-
end if @options[:before]
|
197
|
-
|
198
|
-
# Remove elements
|
199
|
-
@options[:remove].each do |selector|
|
200
|
-
log.info "Removing elements \"#{selector}\""
|
201
|
-
doc.search(selector).remove
|
202
|
-
end if @options[:remove]
|
203
|
-
|
204
|
-
# Save processed doc
|
205
|
-
File.open(asset, 'w') do |f|
|
206
|
-
if @options[:fixup] || true
|
207
|
-
# HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
|
208
|
-
# in html node and adds them anyway. Just remove them here to avoid duplicates.
|
209
|
-
doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
|
210
|
-
doc.write_xhtml_to(f, :encoding => 'UTF-8')
|
211
|
-
else
|
212
|
-
doc.write_html_to(f, :encoding => 'UTF-8')
|
213
|
-
end
|
214
|
-
end
|
125
|
+
def apply_file_filters(file)
|
126
|
+
s = PostFilters::FileFilters.apply_filters(IO.read(file), @options)
|
127
|
+
File.open(file, 'w') { |f| f.write(s) }
|
215
128
|
end
|
216
129
|
|
217
|
-
def
|
218
|
-
|
219
|
-
|
220
|
-
|
130
|
+
def apply_document_filters(file)
|
131
|
+
doc = Nokogiri::HTML.parse(IO.read(file), nil, 'UTF-8')
|
132
|
+
doc = PostFilters::DocumentFilters.apply_filters(doc, @options)
|
133
|
+
File.open(file, 'w') do |f|
|
134
|
+
# HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
|
135
|
+
# in html node and adds them anyway. Just remove them here to avoid duplicates.
|
136
|
+
doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
|
137
|
+
doc.write_xhtml_to(f, :encoding => 'UTF-8')
|
221
138
|
end
|
222
139
|
end
|
223
140
|
|
224
|
-
def
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
141
|
+
# def postprocess_file(asset)
|
142
|
+
# source = IO.read(asset)
|
143
|
+
#
|
144
|
+
# # Do rx substitutions
|
145
|
+
# @options[:rx].each do |rx|
|
146
|
+
# rx.strip!
|
147
|
+
# delimiter = rx[0, 1]
|
148
|
+
# rx = rx.gsub(/\\#{delimiter}/, "\n")
|
149
|
+
# ra = rx.split(/#{delimiter}/).reject {|e| e.empty? }.each {|e| e.gsub!(/\n/, "#{delimiter}")}
|
150
|
+
# raise ParserException, "Invalid regular expression" if ra.empty? || ra[0].nil? || ra.size > 2
|
151
|
+
# pattern = ra[0]
|
152
|
+
# replacement = ra[1] || ''
|
153
|
+
# log.info "Replacing pattern /#{pattern.gsub(/#{delimiter}/, "\\#{delimiter}")}/ with \"#{replacement}\""
|
154
|
+
# source.gsub!(Regexp.new(pattern), replacement)
|
155
|
+
# end if @options[:rx]
|
156
|
+
#
|
157
|
+
# # Remove xml preamble if any
|
158
|
+
# preamble_rx = /^\s*<\?xml\s+[^>]+>\s*/mi
|
159
|
+
# if source =~ preamble_rx
|
160
|
+
# log.debug "-- Removing xml preamble"
|
161
|
+
# source.sub!(preamble_rx, '')
|
162
|
+
# end
|
163
|
+
#
|
164
|
+
# # Replace doctype
|
165
|
+
# doctype_rx = /^\s*<!DOCTYPE\s+[^>]+>\s*/mi
|
166
|
+
# if source =~ doctype_rx
|
167
|
+
# source.sub!(doctype_rx, '')
|
168
|
+
# end
|
169
|
+
# log.debug "-- Replacing doctype"
|
170
|
+
# source = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + source
|
171
|
+
#
|
172
|
+
# # Save processed file
|
173
|
+
# File.open(asset, 'w') do |f|
|
174
|
+
# f.write(source)
|
175
|
+
# end
|
176
|
+
# end
|
238
177
|
|
239
|
-
def
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
178
|
+
# def postprocess_doc(asset)
|
179
|
+
# doc = Nokogiri::HTML.parse(IO.read(asset), nil, 'UTF-8')
|
180
|
+
#
|
181
|
+
# # Set Content-Type charset to UTF-8
|
182
|
+
# doc.xpath('//head/meta[@http-equiv="Content-Type"]').each do |el|
|
183
|
+
# el['content'] = 'text/html; charset=utf-8'
|
184
|
+
# end
|
185
|
+
#
|
186
|
+
# # Process styles
|
187
|
+
# if @options[:css] && !@options[:css].empty?
|
188
|
+
# # Remove all stylesheet links
|
189
|
+
# doc.xpath('//head/link[@rel="stylesheet"]').remove
|
190
|
+
# if @options[:css] == '-'
|
191
|
+
# # Also remove all inline styles
|
192
|
+
# doc.xpath('//head/style').remove
|
193
|
+
# log.info "Removing all stylesheet links and style elements"
|
194
|
+
# else
|
195
|
+
# # Add custom stylesheet link
|
196
|
+
# link = Nokogiri::XML::Node.new('link', doc)
|
197
|
+
# link['rel'] = 'stylesheet'
|
198
|
+
# link['type'] = 'text/css'
|
199
|
+
# link['href'] = File.basename(@options[:css])
|
200
|
+
# # Add as the last child so it has precedence over (possible) inline styles before
|
201
|
+
# doc.at('//head').add_child(link)
|
202
|
+
# log.info "Replacing CSS refs with \"#{link['href']}\""
|
203
|
+
# end
|
204
|
+
# end
|
205
|
+
#
|
206
|
+
# # Insert elements after/before selector
|
207
|
+
# @options[:after].each do |e|
|
208
|
+
# selector = e.keys.first
|
209
|
+
# fragment = e[selector]
|
210
|
+
# element = doc.xpath(selector).first
|
211
|
+
# if element
|
212
|
+
# log.info "Inserting fragment \"#{fragment.to_html}\" after \"#{selector}\""
|
213
|
+
# fragment.children.to_a.reverse.each {|node| element.add_next_sibling(node) }
|
214
|
+
# end
|
215
|
+
# end if @options[:after]
|
216
|
+
# @options[:before].each do |e|
|
217
|
+
# selector = e.keys.first
|
218
|
+
# fragment = e[selector]
|
219
|
+
# element = doc.xpath(selector).first
|
220
|
+
# if element
|
221
|
+
# log.info "Inserting fragment \"#{fragment}\" before \"#{selector}\""
|
222
|
+
# fragment.children.to_a.each {|node| element.add_previous_sibling(node) }
|
223
|
+
# end
|
224
|
+
# end if @options[:before]
|
225
|
+
#
|
226
|
+
# # Remove elements
|
227
|
+
# @options[:remove].each do |selector|
|
228
|
+
# log.info "Removing elements \"#{selector}\""
|
229
|
+
# doc.search(selector).remove
|
230
|
+
# end if @options[:remove]
|
231
|
+
#
|
232
|
+
# # XXX
|
233
|
+
# # doc.xpath('//body/a').each do |a|
|
234
|
+
# # wrapper = Nokogiri::XML::Node.new('p', doc)
|
235
|
+
# # a.add_next_sibling(wrapper)
|
236
|
+
# # wrapper << a
|
237
|
+
# # end
|
238
|
+
#
|
239
|
+
# # Save processed doc
|
240
|
+
# File.open(asset, 'w') do |f|
|
241
|
+
# # HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
|
242
|
+
# # in html node and adds them anyway. Just remove them here to avoid duplicates.
|
243
|
+
# doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
|
244
|
+
# doc.write_xhtml_to(f, :encoding => 'UTF-8')
|
245
|
+
# end
|
246
|
+
# end
|
250
247
|
end
|
251
248
|
|
252
249
|
end
|