news2kindle 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/.tachikoma.yml +1 -0
- data/.travis.yml +18 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +119 -0
- data/README.md +59 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/bin/test-generator +21 -0
- data/exe/news2kindle +107 -0
- data/lib/news2kindle.rb +12 -0
- data/lib/news2kindle/dup_checker.rb +41 -0
- data/lib/news2kindle/generator/internet-watch.rb +236 -0
- data/lib/news2kindle/generator/nikkei-free.rb +18 -0
- data/lib/news2kindle/generator/nikkei-paid.rb +352 -0
- data/lib/news2kindle/generator/tdiary.rb +135 -0
- data/lib/news2kindle/generator/wsj-paid.rb +360 -0
- data/lib/news2kindle/generator/wsjus-paid.rb +90 -0
- data/lib/news2kindle/task.rb +116 -0
- data/lib/news2kindle/version.rb +3 -0
- data/news2kindle.gemspec +37 -0
- data/news2kindle.yaml.sample +31 -0
- data/resource/internet-watch.css +27 -0
- data/resource/internet-watch.jpg +0 -0
- data/resource/nikkei.css +43 -0
- data/resource/nikkei.jpg +0 -0
- data/resource/tdiary.css +27 -0
- data/resource/wsj-us.jpg +0 -0
- data/resource/wsj.css +19 -0
- data/resource/wsj.jpg +0 -0
- metadata +245 -0
@@ -0,0 +1,135 @@
|
|
1
|
+
# scraping tDiary's N-Year diary for News2Kindle
|
2
|
+
#
|
3
|
+
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
module News2Kindle
|
9
|
+
module Generator
|
10
|
+
class Tdiary
|
11
|
+
def initialize( tmpdir )
|
12
|
+
@current_dir = tmpdir
|
13
|
+
FileUtils.cp( "./resource/tdiary.css", @current_dir )
|
14
|
+
end
|
15
|
+
|
16
|
+
def generate(opts)
|
17
|
+
now = opts[:now]
|
18
|
+
@top = opts[:tdiary_top] || ENV['TDIARY_TOP']
|
19
|
+
|
20
|
+
html = title = author = now_str = nil
|
21
|
+
begin
|
22
|
+
retry_loop( 5 ) do
|
23
|
+
html = Nokogiri(open("#{@top}?date=#{now.strftime '%m%d'}", 'r:utf-8', &:read))
|
24
|
+
title = (html / 'head title').text
|
25
|
+
author = (html / 'head meta[name="author"]')[0]['content']
|
26
|
+
now_str = now.strftime( '%m-%d' )
|
27
|
+
end
|
28
|
+
rescue => e
|
29
|
+
News2Kindle.logger.info "failed by retry over: #{e.class}: #{e}"
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# generating html
|
34
|
+
#
|
35
|
+
html.css('head meta', 'head link', 'head style', 'script').remove
|
36
|
+
html.css('div.adminmenu', 'div.sidebar', 'div.footer').remove
|
37
|
+
(html / 'img').each do |img|
|
38
|
+
file_name = save_image(img['src'])
|
39
|
+
img['src'] = file_name
|
40
|
+
end
|
41
|
+
open( "#{@current_dir}/index.html", 'w' ){|f| f.write html.to_html}
|
42
|
+
|
43
|
+
#
|
44
|
+
# generating TOC in ncx
|
45
|
+
#
|
46
|
+
open( "#{@current_dir}/toc.ncx", 'w:utf-8' ) do |f|
|
47
|
+
f.write <<-XML.gsub( /^\t/, '' )
|
48
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
49
|
+
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
|
50
|
+
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
|
51
|
+
<docTitle><text>#{title}</text></docTitle>
|
52
|
+
<navMap>
|
53
|
+
<navPoint id="index" playOrder="1">
|
54
|
+
<navLabel>
|
55
|
+
<text>#{title}</text>
|
56
|
+
</navLabel>
|
57
|
+
<content src="index.html" />
|
58
|
+
</navPoint>
|
59
|
+
</navMap>
|
60
|
+
</ncx>
|
61
|
+
XML
|
62
|
+
end
|
63
|
+
|
64
|
+
#
|
65
|
+
# generating OPF
|
66
|
+
#
|
67
|
+
open( "#{@current_dir}/tdiary.opf", 'w:utf-8' ) do |f|
|
68
|
+
f.write <<-XML.gsub( /^\t/, '' )
|
69
|
+
<?xml version="1.0" encoding="utf-8"?>
|
70
|
+
<package unique-identifier="uid">
|
71
|
+
<metadata>
|
72
|
+
<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core" xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
|
73
|
+
<dc:Title>#{title}</dc:Title>
|
74
|
+
<dc:Language>ja-JP</dc:Language>
|
75
|
+
<dc:Creator>#{author}</dc:Creator>
|
76
|
+
<dc:Description>tDiary N-Year Diary</dc:Description>
|
77
|
+
<dc:Date>#{now.strftime( '%d/%m/%Y' )}</dc:Date>
|
78
|
+
</dc-metadata>
|
79
|
+
</metadata>
|
80
|
+
<manifest>
|
81
|
+
<item id="toc" media-type="application/x-dtbncx+xml" href="toc.ncx"></item>
|
82
|
+
<item id="style" media-type="text/css" href="tdiary.css"></item>
|
83
|
+
<item id="index" media-type="text/html" href="index.html"></item>
|
84
|
+
</manifest>
|
85
|
+
<spine toc="toc">
|
86
|
+
<itemref idref="index" />
|
87
|
+
</spine>
|
88
|
+
<tours></tours>
|
89
|
+
<guide>
|
90
|
+
<reference type="start" title="Start Page" href="index.html"></reference>
|
91
|
+
</guide>
|
92
|
+
</package>
|
93
|
+
XML
|
94
|
+
end
|
95
|
+
|
96
|
+
yield "#{@current_dir}/tdiary.opf"
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def retry_loop( times )
|
102
|
+
count = 0
|
103
|
+
begin
|
104
|
+
yield
|
105
|
+
rescue
|
106
|
+
count += 1
|
107
|
+
if count >= times
|
108
|
+
raise
|
109
|
+
else
|
110
|
+
News2Kindle.logger.debug $!
|
111
|
+
News2Kindle.logger.info "#{count} retry."
|
112
|
+
sleep 1
|
113
|
+
retry
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def save_image(img)
|
119
|
+
require 'securerandom'
|
120
|
+
|
121
|
+
img = @top + img if /^https?:/ !~ img
|
122
|
+
uri = URI(img)
|
123
|
+
file_name = "#{SecureRandom.hex}#{uri.to_s.scan(/\.[^\.]+$/)[0]}"
|
124
|
+
begin
|
125
|
+
open("#{@current_dir}/#{file_name}", 'w') do |f|
|
126
|
+
f.write open(uri, &:read)
|
127
|
+
end
|
128
|
+
rescue OpenURI::HTTPError, RuntimeError, Errno::ENOENT
|
129
|
+
News2Kindle.logger.warn "#$!: #{uri}"
|
130
|
+
end
|
131
|
+
return file_name
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,360 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# scraping jp.wsj.com for Kindlizer
|
4
|
+
#
|
5
|
+
|
6
|
+
require 'mechanize'
|
7
|
+
require 'nokogiri'
|
8
|
+
require 'open-uri'
|
9
|
+
require 'tmpdir'
|
10
|
+
require 'pathname'
|
11
|
+
require 'json'
|
12
|
+
|
13
|
+
module News2Kindle
|
14
|
+
module Generator
|
15
|
+
class WsjPaid
|
16
|
+
TOP = 'http://jp.wsj.com'
|
17
|
+
LOGIN = "https://id.wsj.com/access/pages/wsj/jp/login_standalone.html"
|
18
|
+
|
19
|
+
def initialize( tmpdir )
|
20
|
+
begin
|
21
|
+
require 'pit'
|
22
|
+
login = Pit::get( 'wsj', :require => {
|
23
|
+
'user' => 'your ID of WSJ.',
|
24
|
+
'pass' => 'your Password of WSJ.',
|
25
|
+
} )
|
26
|
+
@wsj_id = login['user']
|
27
|
+
@wsj_pw = login['pass']
|
28
|
+
rescue LoadError # no pit library, using environment variables
|
29
|
+
@wsj_id = ENV['WSJ_ID']
|
30
|
+
@wsj_pw = ENV['WSJ_PW']
|
31
|
+
end
|
32
|
+
|
33
|
+
@current_dir = tmpdir
|
34
|
+
|
35
|
+
@src_dir = @current_dir + '/src'
|
36
|
+
Dir::mkdir( @src_dir ) if(!File.exist?( @src_dir ))
|
37
|
+
|
38
|
+
@dst_dir = @current_dir + '/dst'
|
39
|
+
Dir::mkdir( @dst_dir ) if(!File.exist?( @dst_dir ))
|
40
|
+
FileUtils.cp( "./resource/wsj.jpg", @dst_dir )
|
41
|
+
FileUtils.cp( "./resource/wsj.css", @dst_dir )
|
42
|
+
end
|
43
|
+
|
44
|
+
def generate(opts)
|
45
|
+
@now = opts[:now]
|
46
|
+
@now_str = @now.strftime '%Y-%m-%d %H:%M'
|
47
|
+
@title = "WSJ日本版"
|
48
|
+
@lang = "ja-JP"
|
49
|
+
|
50
|
+
agent = Mechanize::new
|
51
|
+
agent.set_proxy( *ENV['HTTP_PROXY'].split( /:/ ) ) if ENV['HTTP_PROXY']
|
52
|
+
|
53
|
+
toc = []
|
54
|
+
toc_cat = []
|
55
|
+
|
56
|
+
agent.get(LOGIN)
|
57
|
+
|
58
|
+
form = agent.page.forms.first
|
59
|
+
form.action = ('https://id.wsj.com/auth/submitlogin.json')
|
60
|
+
form['username'] = @wsj_id
|
61
|
+
form['password'] = @wsj_pw
|
62
|
+
agent.page.forms.first.submit
|
63
|
+
|
64
|
+
response = JSON.parse(agent.page.body)
|
65
|
+
agent.get( response["url"] )
|
66
|
+
|
67
|
+
agent.get( TOP + "/home-page?_wsjregion=asia,jp&_homepage=/home/jp")
|
68
|
+
|
69
|
+
#
|
70
|
+
# scraping top news
|
71
|
+
#
|
72
|
+
toc_top = ['TOP NEWS']
|
73
|
+
(agent.page / "div.whatsNews ul.newsItem h2 a").each do |a|
|
74
|
+
if(a.attr('href') =~ /^http:\/\/jp.wsj.com\/article\//)
|
75
|
+
toc_top << [canonical( a.text.strip ), a.attr( 'href' )]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
toc << toc_top
|
79
|
+
|
80
|
+
#
|
81
|
+
# scraping all categories
|
82
|
+
#
|
83
|
+
first = true
|
84
|
+
(agent.page.root / 'div.wsjMainNav li').each do |li|
|
85
|
+
if(first)
|
86
|
+
first = false
|
87
|
+
next
|
88
|
+
end
|
89
|
+
|
90
|
+
a = (li / 'a').first
|
91
|
+
toc_cat = []
|
92
|
+
toc_cat << canonical( a.text.strip )
|
93
|
+
begin
|
94
|
+
retry_loop( 5 ) do
|
95
|
+
agent.get(a.attr( 'href' ))
|
96
|
+
sleep 1
|
97
|
+
end
|
98
|
+
rescue
|
99
|
+
News2Kindle.logger.error "cannot get #{uri}."
|
100
|
+
raise
|
101
|
+
end
|
102
|
+
|
103
|
+
count = 0
|
104
|
+
(agent.page / "div.leadModule" ).remove
|
105
|
+
newsLinks = (agent.page / "div.headlineSummary ul.newsItem h2 a" )
|
106
|
+
newsLinks.each do |a|
|
107
|
+
if(a.attr('href') =~ /^http:\/\/jp.wsj.com\/article\//)
|
108
|
+
toc_cat << [canonical( a.text.strip ), a.attr( 'href' )]
|
109
|
+
count += 1
|
110
|
+
break if(count >= 10)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
toc << toc_cat
|
114
|
+
end
|
115
|
+
|
116
|
+
begin
|
117
|
+
generate_contents( toc, agent )
|
118
|
+
yield "#{@dst_dir}/wsj-paid.opf"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def canonical( str )
|
125
|
+
str.gsub( /\uFF5E/, "\u301C" ) # for WAVE DASH problem
|
126
|
+
end
|
127
|
+
|
128
|
+
def retry_loop( times )
|
129
|
+
count = 0
|
130
|
+
begin
|
131
|
+
yield
|
132
|
+
rescue
|
133
|
+
count += 1
|
134
|
+
if count >= times
|
135
|
+
raise
|
136
|
+
else
|
137
|
+
News2Kindle.logger.debug $!
|
138
|
+
News2Kindle.logger.info "#{count} retry."
|
139
|
+
retry
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def html_header( title )
|
145
|
+
<<-HTML.gsub( /^\t/, '' )
|
146
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
147
|
+
<html>
|
148
|
+
<head>
|
149
|
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"></meta>
|
150
|
+
<title>#{title}</title>
|
151
|
+
<link rel="stylesheet" href="wsj.css" type="text/css" media="all"></link>
|
152
|
+
</head>
|
153
|
+
<body>
|
154
|
+
<h1>#{title}</h1>
|
155
|
+
HTML
|
156
|
+
end
|
157
|
+
|
158
|
+
def get_html_item( agent, uri, sub = nil )
|
159
|
+
aid = uri2aid( uri )
|
160
|
+
html = nil
|
161
|
+
if File::exist?( "#{@src_dir}/#{aid}#{sub}.html" ) # loading cache
|
162
|
+
html = Nokogiri( open( "#{@src_dir}/#{aid}#{sub}.html", 'r:utf-8', &:read ) )
|
163
|
+
else
|
164
|
+
begin
|
165
|
+
#puts "getting html #{aid}#{sub}"
|
166
|
+
retry_loop( 5 ) do
|
167
|
+
agent.get( uri )
|
168
|
+
html = agent.page.root
|
169
|
+
sleep 1
|
170
|
+
end
|
171
|
+
rescue
|
172
|
+
News2Kindle.logger.error "cannot get #{uri}."
|
173
|
+
raise
|
174
|
+
end
|
175
|
+
open( "#{@src_dir}/#{aid}#{sub}.html", 'w:utf-8' ) do |f|
|
176
|
+
f.write( html.to_html )
|
177
|
+
end
|
178
|
+
end
|
179
|
+
html
|
180
|
+
end
|
181
|
+
|
182
|
+
def scrape_html_item( html )
|
183
|
+
contents = (html / 'div#article_story_body')
|
184
|
+
|
185
|
+
if(contents.size == 0)
|
186
|
+
contents = (html / 'div#slideContainer')
|
187
|
+
if(contents.size > 0)
|
188
|
+
(contents / 'div.dSlideViewer').before((contents / 'div.dSlideViewer li.firstSlide').inner_html)
|
189
|
+
(contents / 'div.dSlideViewer, h2.header, ul.nav-inline').remove
|
190
|
+
end
|
191
|
+
else
|
192
|
+
signature = (contents / 'ul.socialByline')
|
193
|
+
if(signature.size > 0)
|
194
|
+
signature[0].before(signature.inner_text)
|
195
|
+
signature.remove
|
196
|
+
end
|
197
|
+
(contents / 'div.insettipBox , div.insetButton').remove
|
198
|
+
(contents / 'div.insetZoomTargetBox a').remove
|
199
|
+
(contents / 'div.legacyInset div.embedType-interactive').each {|d| d.parent.remove}
|
200
|
+
end
|
201
|
+
|
202
|
+
(contents / 'img').each do |image_tag|
|
203
|
+
image_url = image_tag.attr( 'src' )
|
204
|
+
image_file = File::basename( image_url )
|
205
|
+
if(File.exist?("#{@dst_dir}/#{image_file}"))
|
206
|
+
image_tag.set_attribute("src", image_file)
|
207
|
+
next
|
208
|
+
end
|
209
|
+
begin
|
210
|
+
image = open( image_url, &:read )
|
211
|
+
open( "#{@dst_dir}/#{image_file}", 'w' ){|fp| fp.write image}
|
212
|
+
image_tag.set_attribute("src", image_file)
|
213
|
+
rescue
|
214
|
+
News2Kindle.logger.warn "FAIL TO DOWNLOAD IMAGE: #{image_url}"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
contents.inner_html
|
219
|
+
end
|
220
|
+
|
221
|
+
def html_item( item, uri, agent )
|
222
|
+
aid = uri2aid( uri )
|
223
|
+
return '' unless aid
|
224
|
+
html = get_html_item( agent, uri )
|
225
|
+
|
226
|
+
open( "#{@dst_dir}/#{aid}.html", 'w:utf-8' ) do |f|
|
227
|
+
title_tag = (html / 'meta[@property="og:title"]')
|
228
|
+
title = title_tag.size > 0 ? title_tag[0].attr("content").strip : item
|
229
|
+
f.puts canonical( html_header( title ) )
|
230
|
+
|
231
|
+
f.puts scrape_html_item(html)
|
232
|
+
f.puts html_footer
|
233
|
+
end
|
234
|
+
|
235
|
+
%Q|\t\t<li><a href="#{aid}.html">#{item}</a></li>|
|
236
|
+
end
|
237
|
+
|
238
|
+
def html_footer
|
239
|
+
<<-HTML.gsub( /^\t/, '' )
|
240
|
+
</body>
|
241
|
+
</html>
|
242
|
+
HTML
|
243
|
+
end
|
244
|
+
|
245
|
+
def ncx_header
|
246
|
+
<<-XML.gsub( /^\t/, '' )
|
247
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
248
|
+
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
|
249
|
+
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
|
250
|
+
<docTitle><text>#{@title} (#{@now_str})</text></docTitle>
|
251
|
+
<navMap>
|
252
|
+
<navPoint id="toc" playOrder="0"><navLabel><text>Table of Contents</text></navLabel><content src="toc.html" /></navPoint>
|
253
|
+
XML
|
254
|
+
end
|
255
|
+
|
256
|
+
def ncx_item( item, uri, index )
|
257
|
+
aid = uri2aid( uri )
|
258
|
+
aid ? %Q|\t\t<navPoint id="#{aid}" playOrder="#{index}"><navLabel><text>#{item}</text></navLabel><content src="#{aid}.html" /></navPoint>| : ''
|
259
|
+
end
|
260
|
+
|
261
|
+
def ncx_footer
|
262
|
+
<<-XML.gsub( /^\t/, '' )
|
263
|
+
</navMap>
|
264
|
+
</ncx>
|
265
|
+
XML
|
266
|
+
end
|
267
|
+
|
268
|
+
def opf_header
|
269
|
+
<<-XML.gsub( /^\t/, '' )
|
270
|
+
<?xml version="1.0" encoding="utf-8"?>
|
271
|
+
<package unique-identifier="uid">
|
272
|
+
<metadata>
|
273
|
+
<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core" xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
|
274
|
+
<dc:Title>#{@title} (#{@now_str})</dc:Title>
|
275
|
+
<dc:Language>#{@lang}</dc:Language>
|
276
|
+
<dc:Creator>The Wall Street Journal Online</dc:Creator>
|
277
|
+
<dc:Description>#{@title}、#{@now_str}生成</dc:Description>
|
278
|
+
<dc:Date>#{@now.strftime( '%d/%m/%Y' )}</dc:Date>
|
279
|
+
</dc-metadata>
|
280
|
+
<x-metadata>
|
281
|
+
<output encoding="utf-8" content-type="text/x-oeb1-document"></output>
|
282
|
+
<EmbeddedCover>wsj.jpg</EmbeddedCover>
|
283
|
+
</x-metadata>
|
284
|
+
</metadata>
|
285
|
+
<manifest>
|
286
|
+
<item id="toc" media-type="application/x-dtbncx+xml" href="toc.ncx"></item>
|
287
|
+
<item id="style" media-type="text/css" href="wsj.css"></item>
|
288
|
+
<item id="index" media-type="text/html" href="toc.html"></item>
|
289
|
+
XML
|
290
|
+
end
|
291
|
+
|
292
|
+
def opf_item( uri )
|
293
|
+
aid = uri2aid( uri )
|
294
|
+
aid ? %Q|\t\t<item id="#{aid}" media-type="text/html" href="#{aid}.html"></item>| : ''
|
295
|
+
end
|
296
|
+
|
297
|
+
def opf_footer( aids )
|
298
|
+
r = <<-XML.gsub( /^\t/, '' )
|
299
|
+
</manifest>
|
300
|
+
<spine toc="toc">
|
301
|
+
XML
|
302
|
+
aids.each do |aid|
|
303
|
+
r << %Q|\t<itemref idref="#{aid}" />\n|
|
304
|
+
end
|
305
|
+
r << <<-XML.gsub( /^\t/, '' )
|
306
|
+
<itemref idref="index" />
|
307
|
+
</spine>
|
308
|
+
<tours></tours>
|
309
|
+
<guide>
|
310
|
+
<reference type="toc" title="Table of Contents" href="toc.html"></reference>
|
311
|
+
<reference type="start" title="Top Story" href="#{aids[0]}.html"></reference>
|
312
|
+
</guide>
|
313
|
+
</package>
|
314
|
+
XML
|
315
|
+
r
|
316
|
+
end
|
317
|
+
|
318
|
+
def uri2aid( uri )
|
319
|
+
uri.scan( %r|/article/([^/]*).html| ).flatten[0]
|
320
|
+
end
|
321
|
+
|
322
|
+
def generate_contents( toc, agent )
|
323
|
+
open( "#{@dst_dir}/toc.html", 'w:utf-8' ) do |html|
|
324
|
+
open( "#{@dst_dir}/toc.ncx", 'w:utf-8' ) do |ncx|
|
325
|
+
open( "#{@dst_dir}/wsj-paid.opf", 'w:utf-8' ) do |opf|
|
326
|
+
first = true
|
327
|
+
toc_index = 0
|
328
|
+
aids = []
|
329
|
+
ncx.puts ncx_header
|
330
|
+
opf.puts opf_header
|
331
|
+
toc.each do |category|
|
332
|
+
category.each do |article|
|
333
|
+
if article.class == String
|
334
|
+
html.puts first ?
|
335
|
+
html_header( 'Table of Contents' ) :
|
336
|
+
"\t</ul>\n\t<mbp:pagebreak />"
|
337
|
+
html.puts "\t<h2>#{article}</h2>"
|
338
|
+
html.puts "\t<ul>"
|
339
|
+
first = false
|
340
|
+
else
|
341
|
+
html.puts html_item( article[0], article[1], agent )
|
342
|
+
ncx.puts ncx_item( article[0], article[1], toc_index += 1 )
|
343
|
+
unless aids.index( uri2aid( article[1] ) )
|
344
|
+
opf.puts opf_item( article[1] )
|
345
|
+
aids << uri2aid( article[1] ) if uri2aid( article[1] )
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
html.puts "\t</ul>"
|
351
|
+
html.puts html_footer
|
352
|
+
ncx.puts ncx_footer
|
353
|
+
opf.puts opf_footer( aids )
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|