invisiblellama-repub 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +11 -0
- data/README.rdoc +14 -8
- data/TODO +0 -2
- data/lib/repub.rb +1 -1
- data/lib/repub/app.rb +3 -0
- data/lib/repub/app/builder.rb +151 -154
- data/lib/repub/app/fetcher.rb +10 -23
- data/lib/repub/app/filter.rb +30 -0
- data/lib/repub/app/options.rb +0 -6
- data/lib/repub/app/parser.rb +63 -73
- data/lib/repub/app/post_filters.rb +135 -0
- data/lib/repub/app/pre_filters.rb +50 -0
- data/lib/repub/app/profile.rb +1 -1
- data/lib/repub/epub.rb +4 -3
- data/lib/repub/epub/container_item.rb +49 -0
- data/lib/repub/epub/{toc.rb → ncx.rb} +137 -139
- data/lib/repub/epub/ocf.rb +62 -0
- data/lib/repub/epub/opf.rb +136 -0
- data/repub.gemspec +4 -4
- data/test/epub/{test_toc.rb → test_ncx.rb} +14 -12
- data/test/epub/test_ocf.rb +28 -0
- data/test/epub/{test_content.rb → test_opf.rb} +25 -19
- data/test/test_filter.rb +28 -0
- data/test/test_parser.rb +3 -4
- metadata +17 -11
- data/lib/repub/epub/container.rb +0 -28
- data/lib/repub/epub/content.rb +0 -178
- data/test/epub/test_container.rb +0 -15
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.3.4 / 2009-07-17
|
2
|
+
|
3
|
+
* Bug fixes
|
4
|
+
|
5
|
+
* Pre- and post processing filters moved to separate modules.
|
6
|
+
* Non-conformant element IDs are now fixed automaticly
|
7
|
+
* Regardless of the source settings, doctype now is always set to XHTML 1.0 Transitional
|
8
|
+
* -F (disable fixups) option removed, fixups are always on
|
9
|
+
* Documentation updates
|
10
|
+
* More tests
|
11
|
+
|
1
12
|
== 0.3.3 / 2009-07-05
|
2
13
|
|
3
14
|
* New features
|
data/README.rdoc
CHANGED
@@ -27,9 +27,9 @@ broken too bad) be readable but will be lacking any metadata or TOC.
|
|
27
27
|
|
28
28
|
Few examples:
|
29
29
|
|
30
|
-
* Project Gutenberg's
|
30
|
+
* Project Gutenberg's The Adventures Of Sherlock Holmes (with proper table of contents)
|
31
31
|
|
32
|
-
repub -x 'title:div[@class=
|
32
|
+
repub -x 'title:div[@class="book"]//h1' \
|
33
33
|
-x 'toc://table' \
|
34
34
|
-x 'toc_item://tr' \
|
35
35
|
http://www.gutenberg.org/dirs/etext99/advsh12h.htm
|
@@ -38,7 +38,7 @@ This tells Repub to look for title in the first found H1 in the DIV of class "bo
|
|
38
38
|
located in the first TABLE and TOC item can be found inside TR.
|
39
39
|
The above will produce readable ePub which can be further enhanced by removing some "noise" content:
|
40
40
|
|
41
|
-
repub -x 'title:div[@class=
|
41
|
+
repub -x 'title:div[@class="book"]//h1' \
|
42
42
|
-x 'toc://table' \
|
43
43
|
-x 'toc_item://tr' \
|
44
44
|
-X '//pre' -X '//hr' -X '//body/h1' -X '//body/h2' \
|
@@ -69,6 +69,14 @@ For example, if you later decide to regenerate Git Manual ePub without TOC at th
|
|
69
69
|
|
70
70
|
Few more examples:
|
71
71
|
|
72
|
+
* Open Packaging Format (OPF) 2.0 (one of the ePub standards, in ePub)
|
73
|
+
|
74
|
+
repub -x 'title://p[@class="Title"]' \
|
75
|
+
-x 'toc://div[@class="TOC"]' \
|
76
|
+
-x 'toc_item:.//p' \
|
77
|
+
-x 'toc_section:.//div[@class="TOCSection"]' \
|
78
|
+
http://www.idpf.org/2007/opf/OPF_2.0_final_spec.html
|
79
|
+
|
72
80
|
* GNU Wget Manual
|
73
81
|
|
74
82
|
repub -m 'creator:gnu.org' \
|
@@ -76,7 +84,7 @@ Few more examples:
|
|
76
84
|
-X '//div[@class="contents"]' \
|
77
85
|
http://www.gnu.org/software/wget/manual/wget.html
|
78
86
|
|
79
|
-
*
|
87
|
+
* And finally, the "Hello World" of e-books, Alice's Adventures In Wonderland
|
80
88
|
|
81
89
|
repub -x 'title:body/h1' -x 'toc://table' -x 'toc_item://tr' -X '//pre' -X '//hr' -X '//body/h4' \
|
82
90
|
http://www.gutenberg.org/files/11/11-h/11-h.htm
|
@@ -108,8 +116,6 @@ Parser options:
|
|
108
116
|
-m, --meta NAME:VALUE Set publication information metadata NAME to VALUE.
|
109
117
|
Valid metadata names are: [creator date description
|
110
118
|
language publisher relation rights subject title]
|
111
|
-
-F, --no-fixup Do not attempt to make document meet XHTML 1.0 Strict.
|
112
|
-
Default is to try and fix things that are broken.
|
113
119
|
-e, --encoding NAME Set source document encoding. Default is to autodetect.
|
114
120
|
|
115
121
|
Post-processing options:
|
@@ -144,13 +150,13 @@ Currently, only "everything-on-one-page" HTML sources are supported. Repub will
|
|
144
150
|
|
145
151
|
Encoding auto-detection is slow.
|
146
152
|
|
147
|
-
Chardet 0.9.0 is broken under Ruby 1.9.
|
153
|
+
Chardet 0.9.0 is broken under Ruby 1.9 so if you want to use Ruby 1.9 you have to set encoding manually with -e.
|
148
154
|
|
149
155
|
Bugs: probably. If you find any, please report them to dg at invisiblellama dot net.
|
150
156
|
|
151
157
|
== INSTALL:
|
152
158
|
|
153
|
-
gem install repub
|
159
|
+
sudo gem install repub
|
154
160
|
|
155
161
|
== LICENSE:
|
156
162
|
|
data/TODO
CHANGED
data/lib/repub.rb
CHANGED
data/lib/repub/app.rb
CHANGED
@@ -5,6 +5,9 @@ require 'repub/app/utility'
|
|
5
5
|
require 'repub/app/logger'
|
6
6
|
require 'repub/app/options'
|
7
7
|
require 'repub/app/profile'
|
8
|
+
require 'repub/app/filter'
|
9
|
+
require 'repub/app/pre_filters'
|
10
|
+
require 'repub/app/post_filters'
|
8
11
|
require 'repub/app/fetcher'
|
9
12
|
require 'repub/app/parser'
|
10
13
|
require 'repub/app/builder'
|
data/lib/repub/app/builder.rb
CHANGED
@@ -13,7 +13,7 @@ module Repub
|
|
13
13
|
end
|
14
14
|
|
15
15
|
class Builder
|
16
|
-
include
|
16
|
+
include Logger
|
17
17
|
|
18
18
|
attr_reader :output_path
|
19
19
|
attr_reader :document_path
|
@@ -25,45 +25,50 @@ module Repub
|
|
25
25
|
def build(parser)
|
26
26
|
@parser = parser
|
27
27
|
|
28
|
-
# Initialize
|
29
|
-
@
|
28
|
+
# Initialize Container
|
29
|
+
@ocf = Epub::OCF.new
|
30
|
+
|
31
|
+
# Initialize Package
|
32
|
+
@opf = Epub::OPF.new(@parser.uid)
|
33
|
+
@ocf << @opf
|
30
34
|
# Default title is the parsed one
|
31
|
-
@
|
35
|
+
@opf.metadata.title = @parser.title
|
32
36
|
# Override metadata values specified in options
|
33
37
|
if @options[:metadata]
|
34
|
-
@
|
38
|
+
@opf.metadata.members.each do |m|
|
35
39
|
m = m.to_sym
|
36
|
-
|
40
|
+
# Do not allow to override uid
|
41
|
+
next if m == :identifier
|
37
42
|
if @options[:metadata][m]
|
38
|
-
@
|
39
|
-
log.debug "-- Setting metadata #{m} to \"#{@
|
43
|
+
@opf.metadata[m] = @options[:metadata][m]
|
44
|
+
log.debug "-- Setting metadata #{m} to \"#{@opf.metadata[m]}\""
|
40
45
|
end
|
41
46
|
end
|
42
47
|
end
|
43
48
|
|
44
|
-
# Initialize
|
45
|
-
@
|
46
|
-
|
47
|
-
@
|
49
|
+
# Initialize TOC
|
50
|
+
@ncx = Epub::NCX.new(@parser.uid)
|
51
|
+
@opf << @ncx
|
52
|
+
@ncx.title = @opf.metadata.title
|
53
|
+
@ncx.nav_map.points = @parser.toc
|
48
54
|
|
49
55
|
# Setup output filename and path
|
50
56
|
@output_path = File.expand_path(@options[:output_path].if_blank('.'))
|
51
57
|
if File.exist?(@output_path) && File.directory?(@output_path)
|
52
|
-
@output_path = File.join(@output_path, @
|
58
|
+
@output_path = File.join(@output_path, @opf.metadata.title.gsub(/\s/, '_'))
|
53
59
|
end
|
54
60
|
@output_path = @output_path + '.epub'
|
55
|
-
log.debug "--
|
61
|
+
log.debug "-- Output path is #{@output_path}"
|
56
62
|
|
57
63
|
# Build EPUB
|
58
64
|
tmpdir = Dir.mktmpdir(App::name)
|
59
65
|
begin
|
60
66
|
FileUtils.chdir(tmpdir) do
|
61
67
|
copy_and_process_assets
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
write_epub
|
68
|
+
@ncx.save
|
69
|
+
@opf.save
|
70
|
+
@ocf.save
|
71
|
+
@ocf.zip(@output_path)
|
67
72
|
end
|
68
73
|
ensure
|
69
74
|
# Keep tmp folder if we're going open processed doc in browser
|
@@ -74,19 +79,17 @@ module Repub
|
|
74
79
|
|
75
80
|
private
|
76
81
|
|
77
|
-
MetaInf = 'META-INF'
|
78
|
-
|
79
82
|
def copy_and_process_assets
|
80
83
|
# Copy html
|
81
|
-
@parser.cache.assets[:documents].each do |
|
82
|
-
log.debug "-- Processing document #{
|
84
|
+
@parser.cache.assets[:documents].each do |file|
|
85
|
+
log.debug "-- Processing document #{file}"
|
83
86
|
# Copy asset from cache
|
84
|
-
FileUtils.cp(File.join(@parser.cache.path,
|
87
|
+
FileUtils.cp(File.join(@parser.cache.path, file), '.')
|
85
88
|
# Do post-processing
|
86
|
-
|
87
|
-
|
88
|
-
@
|
89
|
-
@document_path = File.expand_path(
|
89
|
+
apply_file_filters(file)
|
90
|
+
apply_document_filters(file)
|
91
|
+
@opf << file
|
92
|
+
@document_path = File.expand_path(file)
|
90
93
|
end
|
91
94
|
|
92
95
|
# Copy css
|
@@ -95,158 +98,152 @@ module Repub
|
|
95
98
|
@parser.cache.assets[:stylesheets].each do |css|
|
96
99
|
log.debug "-- Copying stylesheet #{css}"
|
97
100
|
FileUtils.cp(File.join(@parser.cache.path, css), '.')
|
98
|
-
@
|
101
|
+
@opf << css
|
99
102
|
end
|
100
103
|
elsif @options[:css] != '-'
|
101
104
|
# Copy custom css
|
102
105
|
log.debug "-- Using custom stylesheet #{@options[:css]}"
|
103
106
|
FileUtils.cp(@options[:css], '.')
|
104
|
-
@
|
107
|
+
@opf << File.basename(@options[:css])
|
105
108
|
end
|
106
109
|
|
107
110
|
# Copy images
|
108
111
|
@parser.cache.assets[:images].each do |image|
|
109
112
|
log.debug "-- Copying image #{image}"
|
110
113
|
FileUtils.cp(File.join(@parser.cache.path, image), '.')
|
111
|
-
@
|
114
|
+
@opf << image
|
112
115
|
end
|
113
116
|
|
114
117
|
# Copy external custom files (-a option)
|
115
118
|
@options[:add].each do |file|
|
116
119
|
log.debug "-- Copying external file #{file}"
|
117
120
|
FileUtils.cp(file, '.')
|
118
|
-
@
|
121
|
+
@opf << file
|
119
122
|
end if @options[:add]
|
120
123
|
end
|
121
|
-
|
122
|
-
def postprocess_file(asset)
|
123
|
-
source = IO.read(asset)
|
124
|
-
|
125
|
-
# Do rx substitutions
|
126
|
-
@options[:rx].each do |rx|
|
127
|
-
rx.strip!
|
128
|
-
delimiter = rx[0, 1]
|
129
|
-
rx = rx.gsub(/\\#{delimiter}/, "\n")
|
130
|
-
ra = rx.split(/#{delimiter}/).reject {|e| e.empty? }.each {|e| e.gsub!(/\n/, "#{delimiter}")}
|
131
|
-
raise ParserException, "Invalid regular expression" if ra.empty? || ra[0].nil? || ra.size > 2
|
132
|
-
pattern = ra[0]
|
133
|
-
replacement = ra[1] || ''
|
134
|
-
log.info "Replacing pattern /#{pattern.gsub(/#{delimiter}/, "\\#{delimiter}")}/ with \"#{replacement}\""
|
135
|
-
source.gsub!(Regexp.new(pattern), replacement)
|
136
|
-
end if @options[:rx]
|
137
|
-
|
138
|
-
# Add doctype if missing
|
139
|
-
if source !~ /\s*<!DOCTYPE/
|
140
|
-
log.debug "-- Adding missing doctype"
|
141
|
-
source = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + source
|
142
|
-
end
|
143
|
-
|
144
|
-
# Save processed file
|
145
|
-
File.open(asset, 'w') do |f|
|
146
|
-
f.write(source)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
def postprocess_doc(asset)
|
151
|
-
doc = Nokogiri::HTML.parse(IO.read(asset), nil, 'UTF-8')
|
152
|
-
|
153
|
-
# Set Content-Type charset to UTF-8
|
154
|
-
doc.xpath('//head/meta[@http-equiv="Content-Type"]').each do |el|
|
155
|
-
el['content'] = 'text/html; charset=utf-8'
|
156
|
-
end
|
157
|
-
|
158
|
-
# Process styles
|
159
|
-
if @options[:css] && !@options[:css].empty?
|
160
|
-
# Remove all stylesheet links
|
161
|
-
doc.xpath('//head/link[@rel="stylesheet"]').remove
|
162
|
-
if @options[:css] == '-'
|
163
|
-
# Also remove all inline styles
|
164
|
-
doc.xpath('//head/style').remove
|
165
|
-
log.info "Removing all stylesheet links and style elements"
|
166
|
-
else
|
167
|
-
# Add custom stylesheet link
|
168
|
-
link = Nokogiri::XML::Node.new('link', doc)
|
169
|
-
link['rel'] = 'stylesheet'
|
170
|
-
link['type'] = 'text/css'
|
171
|
-
link['href'] = File.basename(@options[:css])
|
172
|
-
# Add as the last child so it has precedence over (possible) inline styles before
|
173
|
-
doc.at('//head').add_child(link)
|
174
|
-
log.info "Replacing CSS refs with \"#{link['href']}\""
|
175
|
-
end
|
176
|
-
end
|
177
124
|
|
178
|
-
|
179
|
-
@options
|
180
|
-
|
181
|
-
fragment = e[selector]
|
182
|
-
element = doc.xpath(selector).first
|
183
|
-
if element
|
184
|
-
log.info "Inserting fragment \"#{fragment.to_html}\" after \"#{selector}\""
|
185
|
-
fragment.children.to_a.reverse.each {|node| element.add_next_sibling(node) }
|
186
|
-
end
|
187
|
-
end if @options[:after]
|
188
|
-
@options[:before].each do |e|
|
189
|
-
selector = e.keys.first
|
190
|
-
fragment = e[selector]
|
191
|
-
element = doc.xpath(selector).first
|
192
|
-
if element
|
193
|
-
log.info "Inserting fragment \"#{fragment}\" before \"#{selector}\""
|
194
|
-
fragment.children.to_a.each {|node| element.add_previous_sibling(node) }
|
195
|
-
end
|
196
|
-
end if @options[:before]
|
197
|
-
|
198
|
-
# Remove elements
|
199
|
-
@options[:remove].each do |selector|
|
200
|
-
log.info "Removing elements \"#{selector}\""
|
201
|
-
doc.search(selector).remove
|
202
|
-
end if @options[:remove]
|
203
|
-
|
204
|
-
# Save processed doc
|
205
|
-
File.open(asset, 'w') do |f|
|
206
|
-
if @options[:fixup] || true
|
207
|
-
# HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
|
208
|
-
# in html node and adds them anyway. Just remove them here to avoid duplicates.
|
209
|
-
doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
|
210
|
-
doc.write_xhtml_to(f, :encoding => 'UTF-8')
|
211
|
-
else
|
212
|
-
doc.write_html_to(f, :encoding => 'UTF-8')
|
213
|
-
end
|
214
|
-
end
|
125
|
+
def apply_file_filters(file)
|
126
|
+
s = PostFilters::FileFilters.apply_filters(IO.read(file), @options)
|
127
|
+
File.open(file, 'w') { |f| f.write(s) }
|
215
128
|
end
|
216
129
|
|
217
|
-
def
|
218
|
-
|
219
|
-
|
220
|
-
|
130
|
+
def apply_document_filters(file)
|
131
|
+
doc = Nokogiri::HTML.parse(IO.read(file), nil, 'UTF-8')
|
132
|
+
doc = PostFilters::DocumentFilters.apply_filters(doc, @options)
|
133
|
+
File.open(file, 'w') do |f|
|
134
|
+
# HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
|
135
|
+
# in html node and adds them anyway. Just remove them here to avoid duplicates.
|
136
|
+
doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
|
137
|
+
doc.write_xhtml_to(f, :encoding => 'UTF-8')
|
221
138
|
end
|
222
139
|
end
|
223
140
|
|
224
|
-
def
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
141
|
+
# def postprocess_file(asset)
|
142
|
+
# source = IO.read(asset)
|
143
|
+
#
|
144
|
+
# # Do rx substitutions
|
145
|
+
# @options[:rx].each do |rx|
|
146
|
+
# rx.strip!
|
147
|
+
# delimiter = rx[0, 1]
|
148
|
+
# rx = rx.gsub(/\\#{delimiter}/, "\n")
|
149
|
+
# ra = rx.split(/#{delimiter}/).reject {|e| e.empty? }.each {|e| e.gsub!(/\n/, "#{delimiter}")}
|
150
|
+
# raise ParserException, "Invalid regular expression" if ra.empty? || ra[0].nil? || ra.size > 2
|
151
|
+
# pattern = ra[0]
|
152
|
+
# replacement = ra[1] || ''
|
153
|
+
# log.info "Replacing pattern /#{pattern.gsub(/#{delimiter}/, "\\#{delimiter}")}/ with \"#{replacement}\""
|
154
|
+
# source.gsub!(Regexp.new(pattern), replacement)
|
155
|
+
# end if @options[:rx]
|
156
|
+
#
|
157
|
+
# # Remove xml preamble if any
|
158
|
+
# preamble_rx = /^\s*<\?xml\s+[^>]+>\s*/mi
|
159
|
+
# if source =~ preamble_rx
|
160
|
+
# log.debug "-- Removing xml preamble"
|
161
|
+
# source.sub!(preamble_rx, '')
|
162
|
+
# end
|
163
|
+
#
|
164
|
+
# # Replace doctype
|
165
|
+
# doctype_rx = /^\s*<!DOCTYPE\s+[^>]+>\s*/mi
|
166
|
+
# if source =~ doctype_rx
|
167
|
+
# source.sub!(doctype_rx, '')
|
168
|
+
# end
|
169
|
+
# log.debug "-- Replacing doctype"
|
170
|
+
# source = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + source
|
171
|
+
#
|
172
|
+
# # Save processed file
|
173
|
+
# File.open(asset, 'w') do |f|
|
174
|
+
# f.write(source)
|
175
|
+
# end
|
176
|
+
# end
|
238
177
|
|
239
|
-
def
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
178
|
+
# def postprocess_doc(asset)
|
179
|
+
# doc = Nokogiri::HTML.parse(IO.read(asset), nil, 'UTF-8')
|
180
|
+
#
|
181
|
+
# # Set Content-Type charset to UTF-8
|
182
|
+
# doc.xpath('//head/meta[@http-equiv="Content-Type"]').each do |el|
|
183
|
+
# el['content'] = 'text/html; charset=utf-8'
|
184
|
+
# end
|
185
|
+
#
|
186
|
+
# # Process styles
|
187
|
+
# if @options[:css] && !@options[:css].empty?
|
188
|
+
# # Remove all stylesheet links
|
189
|
+
# doc.xpath('//head/link[@rel="stylesheet"]').remove
|
190
|
+
# if @options[:css] == '-'
|
191
|
+
# # Also remove all inline styles
|
192
|
+
# doc.xpath('//head/style').remove
|
193
|
+
# log.info "Removing all stylesheet links and style elements"
|
194
|
+
# else
|
195
|
+
# # Add custom stylesheet link
|
196
|
+
# link = Nokogiri::XML::Node.new('link', doc)
|
197
|
+
# link['rel'] = 'stylesheet'
|
198
|
+
# link['type'] = 'text/css'
|
199
|
+
# link['href'] = File.basename(@options[:css])
|
200
|
+
# # Add as the last child so it has precedence over (possible) inline styles before
|
201
|
+
# doc.at('//head').add_child(link)
|
202
|
+
# log.info "Replacing CSS refs with \"#{link['href']}\""
|
203
|
+
# end
|
204
|
+
# end
|
205
|
+
#
|
206
|
+
# # Insert elements after/before selector
|
207
|
+
# @options[:after].each do |e|
|
208
|
+
# selector = e.keys.first
|
209
|
+
# fragment = e[selector]
|
210
|
+
# element = doc.xpath(selector).first
|
211
|
+
# if element
|
212
|
+
# log.info "Inserting fragment \"#{fragment.to_html}\" after \"#{selector}\""
|
213
|
+
# fragment.children.to_a.reverse.each {|node| element.add_next_sibling(node) }
|
214
|
+
# end
|
215
|
+
# end if @options[:after]
|
216
|
+
# @options[:before].each do |e|
|
217
|
+
# selector = e.keys.first
|
218
|
+
# fragment = e[selector]
|
219
|
+
# element = doc.xpath(selector).first
|
220
|
+
# if element
|
221
|
+
# log.info "Inserting fragment \"#{fragment}\" before \"#{selector}\""
|
222
|
+
# fragment.children.to_a.each {|node| element.add_previous_sibling(node) }
|
223
|
+
# end
|
224
|
+
# end if @options[:before]
|
225
|
+
#
|
226
|
+
# # Remove elements
|
227
|
+
# @options[:remove].each do |selector|
|
228
|
+
# log.info "Removing elements \"#{selector}\""
|
229
|
+
# doc.search(selector).remove
|
230
|
+
# end if @options[:remove]
|
231
|
+
#
|
232
|
+
# # XXX
|
233
|
+
# # doc.xpath('//body/a').each do |a|
|
234
|
+
# # wrapper = Nokogiri::XML::Node.new('p', doc)
|
235
|
+
# # a.add_next_sibling(wrapper)
|
236
|
+
# # wrapper << a
|
237
|
+
# # end
|
238
|
+
#
|
239
|
+
# # Save processed doc
|
240
|
+
# File.open(asset, 'w') do |f|
|
241
|
+
# # HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
|
242
|
+
# # in html node and adds them anyway. Just remove them here to avoid duplicates.
|
243
|
+
# doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
|
244
|
+
# doc.write_xhtml_to(f, :encoding => 'UTF-8')
|
245
|
+
# end
|
246
|
+
# end
|
250
247
|
end
|
251
248
|
|
252
249
|
end
|