news2kindle 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/.tachikoma.yml +1 -0
- data/.travis.yml +18 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +119 -0
- data/README.md +59 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/bin/test-generator +21 -0
- data/exe/news2kindle +107 -0
- data/lib/news2kindle.rb +12 -0
- data/lib/news2kindle/dup_checker.rb +41 -0
- data/lib/news2kindle/generator/internet-watch.rb +236 -0
- data/lib/news2kindle/generator/nikkei-free.rb +18 -0
- data/lib/news2kindle/generator/nikkei-paid.rb +352 -0
- data/lib/news2kindle/generator/tdiary.rb +135 -0
- data/lib/news2kindle/generator/wsj-paid.rb +360 -0
- data/lib/news2kindle/generator/wsjus-paid.rb +90 -0
- data/lib/news2kindle/task.rb +116 -0
- data/lib/news2kindle/version.rb +3 -0
- data/news2kindle.gemspec +37 -0
- data/news2kindle.yaml.sample +31 -0
- data/resource/internet-watch.css +27 -0
- data/resource/internet-watch.jpg +0 -0
- data/resource/nikkei.css +43 -0
- data/resource/nikkei.jpg +0 -0
- data/resource/tdiary.css +27 -0
- data/resource/wsj-us.jpg +0 -0
- data/resource/wsj.css +19 -0
- data/resource/wsj.jpg +0 -0
- metadata +245 -0
@@ -0,0 +1,135 @@
|
|
1
|
+
# scraping tDiary's N-Year diary for News2Kindle
|
2
|
+
#
|
3
|
+
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
module News2Kindle
|
9
|
+
module Generator
|
10
|
+
class Tdiary
|
11
|
+
def initialize( tmpdir )
|
12
|
+
@current_dir = tmpdir
|
13
|
+
FileUtils.cp( "./resource/tdiary.css", @current_dir )
|
14
|
+
end
|
15
|
+
|
16
|
+
def generate(opts)
|
17
|
+
now = opts[:now]
|
18
|
+
@top = opts[:tdiary_top] || ENV['TDIARY_TOP']
|
19
|
+
|
20
|
+
html = title = author = now_str = nil
|
21
|
+
begin
|
22
|
+
retry_loop( 5 ) do
|
23
|
+
html = Nokogiri(open("#{@top}?date=#{now.strftime '%m%d'}", 'r:utf-8', &:read))
|
24
|
+
title = (html / 'head title').text
|
25
|
+
author = (html / 'head meta[name="author"]')[0]['content']
|
26
|
+
now_str = now.strftime( '%m-%d' )
|
27
|
+
end
|
28
|
+
rescue => e
|
29
|
+
News2Kindle.logger.info "failed by retry over: #{e.class}: #{e}"
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# generating html
|
34
|
+
#
|
35
|
+
html.css('head meta', 'head link', 'head style', 'script').remove
|
36
|
+
html.css('div.adminmenu', 'div.sidebar', 'div.footer').remove
|
37
|
+
(html / 'img').each do |img|
|
38
|
+
file_name = save_image(img['src'])
|
39
|
+
img['src'] = file_name
|
40
|
+
end
|
41
|
+
open( "#{@current_dir}/index.html", 'w' ){|f| f.write html.to_html}
|
42
|
+
|
43
|
+
#
|
44
|
+
# generating TOC in ncx
|
45
|
+
#
|
46
|
+
open( "#{@current_dir}/toc.ncx", 'w:utf-8' ) do |f|
|
47
|
+
f.write <<-XML.gsub( /^\t/, '' )
|
48
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
49
|
+
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
|
50
|
+
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
|
51
|
+
<docTitle><text>#{title}</text></docTitle>
|
52
|
+
<navMap>
|
53
|
+
<navPoint id="index" playOrder="1">
|
54
|
+
<navLabel>
|
55
|
+
<text>#{title}</text>
|
56
|
+
</navLabel>
|
57
|
+
<content src="index.html" />
|
58
|
+
</navPoint>
|
59
|
+
</navMap>
|
60
|
+
</ncx>
|
61
|
+
XML
|
62
|
+
end
|
63
|
+
|
64
|
+
#
|
65
|
+
# generating OPF
|
66
|
+
#
|
67
|
+
open( "#{@current_dir}/tdiary.opf", 'w:utf-8' ) do |f|
|
68
|
+
f.write <<-XML.gsub( /^\t/, '' )
|
69
|
+
<?xml version="1.0" encoding="utf-8"?>
|
70
|
+
<package unique-identifier="uid">
|
71
|
+
<metadata>
|
72
|
+
<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core" xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
|
73
|
+
<dc:Title>#{title}</dc:Title>
|
74
|
+
<dc:Language>ja-JP</dc:Language>
|
75
|
+
<dc:Creator>#{author}</dc:Creator>
|
76
|
+
<dc:Description>tDiary N-Year Diary</dc:Description>
|
77
|
+
<dc:Date>#{now.strftime( '%d/%m/%Y' )}</dc:Date>
|
78
|
+
</dc-metadata>
|
79
|
+
</metadata>
|
80
|
+
<manifest>
|
81
|
+
<item id="toc" media-type="application/x-dtbncx+xml" href="toc.ncx"></item>
|
82
|
+
<item id="style" media-type="text/css" href="tdiary.css"></item>
|
83
|
+
<item id="index" media-type="text/html" href="index.html"></item>
|
84
|
+
</manifest>
|
85
|
+
<spine toc="toc">
|
86
|
+
<itemref idref="index" />
|
87
|
+
</spine>
|
88
|
+
<tours></tours>
|
89
|
+
<guide>
|
90
|
+
<reference type="start" title="Start Page" href="index.html"></reference>
|
91
|
+
</guide>
|
92
|
+
</package>
|
93
|
+
XML
|
94
|
+
end
|
95
|
+
|
96
|
+
yield "#{@current_dir}/tdiary.opf"
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def retry_loop( times )
|
102
|
+
count = 0
|
103
|
+
begin
|
104
|
+
yield
|
105
|
+
rescue
|
106
|
+
count += 1
|
107
|
+
if count >= times
|
108
|
+
raise
|
109
|
+
else
|
110
|
+
News2Kindle.logger.debug $!
|
111
|
+
News2Kindle.logger.info "#{count} retry."
|
112
|
+
sleep 1
|
113
|
+
retry
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def save_image(img)
|
119
|
+
require 'securerandom'
|
120
|
+
|
121
|
+
img = @top + img if /^https?:/ !~ img
|
122
|
+
uri = URI(img)
|
123
|
+
file_name = "#{SecureRandom.hex}#{uri.to_s.scan(/\.[^\.]+$/)[0]}"
|
124
|
+
begin
|
125
|
+
open("#{@current_dir}/#{file_name}", 'w') do |f|
|
126
|
+
f.write open(uri, &:read)
|
127
|
+
end
|
128
|
+
rescue OpenURI::HTTPError, RuntimeError, Errno::ENOENT
|
129
|
+
News2Kindle.logger.warn "#$!: #{uri}"
|
130
|
+
end
|
131
|
+
return file_name
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,360 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# scraping jp.wsj.com for Kindlizer
|
4
|
+
#
|
5
|
+
|
6
|
+
require 'mechanize'
|
7
|
+
require 'nokogiri'
|
8
|
+
require 'open-uri'
|
9
|
+
require 'tmpdir'
|
10
|
+
require 'pathname'
|
11
|
+
require 'json'
|
12
|
+
|
13
|
+
module News2Kindle
|
14
|
+
module Generator
|
15
|
+
class WsjPaid
|
16
|
+
TOP = 'http://jp.wsj.com'
|
17
|
+
LOGIN = "https://id.wsj.com/access/pages/wsj/jp/login_standalone.html"
|
18
|
+
|
19
|
+
def initialize( tmpdir )
|
20
|
+
begin
|
21
|
+
require 'pit'
|
22
|
+
login = Pit::get( 'wsj', :require => {
|
23
|
+
'user' => 'your ID of WSJ.',
|
24
|
+
'pass' => 'your Password of WSJ.',
|
25
|
+
} )
|
26
|
+
@wsj_id = login['user']
|
27
|
+
@wsj_pw = login['pass']
|
28
|
+
rescue LoadError # no pit library, using environment variables
|
29
|
+
@wsj_id = ENV['WSJ_ID']
|
30
|
+
@wsj_pw = ENV['WSJ_PW']
|
31
|
+
end
|
32
|
+
|
33
|
+
@current_dir = tmpdir
|
34
|
+
|
35
|
+
@src_dir = @current_dir + '/src'
|
36
|
+
Dir::mkdir( @src_dir ) if(!File.exist?( @src_dir ))
|
37
|
+
|
38
|
+
@dst_dir = @current_dir + '/dst'
|
39
|
+
Dir::mkdir( @dst_dir ) if(!File.exist?( @dst_dir ))
|
40
|
+
FileUtils.cp( "./resource/wsj.jpg", @dst_dir )
|
41
|
+
FileUtils.cp( "./resource/wsj.css", @dst_dir )
|
42
|
+
end
|
43
|
+
|
44
|
+
def generate(opts)
|
45
|
+
@now = opts[:now]
|
46
|
+
@now_str = @now.strftime '%Y-%m-%d %H:%M'
|
47
|
+
@title = "WSJ日本版"
|
48
|
+
@lang = "ja-JP"
|
49
|
+
|
50
|
+
agent = Mechanize::new
|
51
|
+
agent.set_proxy( *ENV['HTTP_PROXY'].split( /:/ ) ) if ENV['HTTP_PROXY']
|
52
|
+
|
53
|
+
toc = []
|
54
|
+
toc_cat = []
|
55
|
+
|
56
|
+
agent.get(LOGIN)
|
57
|
+
|
58
|
+
form = agent.page.forms.first
|
59
|
+
form.action = ('https://id.wsj.com/auth/submitlogin.json')
|
60
|
+
form['username'] = @wsj_id
|
61
|
+
form['password'] = @wsj_pw
|
62
|
+
agent.page.forms.first.submit
|
63
|
+
|
64
|
+
response = JSON.parse(agent.page.body)
|
65
|
+
agent.get( response["url"] )
|
66
|
+
|
67
|
+
agent.get( TOP + "/home-page?_wsjregion=asia,jp&_homepage=/home/jp")
|
68
|
+
|
69
|
+
#
|
70
|
+
# scraping top news
|
71
|
+
#
|
72
|
+
toc_top = ['TOP NEWS']
|
73
|
+
(agent.page / "div.whatsNews ul.newsItem h2 a").each do |a|
|
74
|
+
if(a.attr('href') =~ /^http:\/\/jp.wsj.com\/article\//)
|
75
|
+
toc_top << [canonical( a.text.strip ), a.attr( 'href' )]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
toc << toc_top
|
79
|
+
|
80
|
+
#
|
81
|
+
# scraping all categories
|
82
|
+
#
|
83
|
+
first = true
|
84
|
+
(agent.page.root / 'div.wsjMainNav li').each do |li|
|
85
|
+
if(first)
|
86
|
+
first = false
|
87
|
+
next
|
88
|
+
end
|
89
|
+
|
90
|
+
a = (li / 'a').first
|
91
|
+
toc_cat = []
|
92
|
+
toc_cat << canonical( a.text.strip )
|
93
|
+
begin
|
94
|
+
retry_loop( 5 ) do
|
95
|
+
agent.get(a.attr( 'href' ))
|
96
|
+
sleep 1
|
97
|
+
end
|
98
|
+
rescue
|
99
|
+
News2Kindle.logger.error "cannot get #{uri}."
|
100
|
+
raise
|
101
|
+
end
|
102
|
+
|
103
|
+
count = 0
|
104
|
+
(agent.page / "div.leadModule" ).remove
|
105
|
+
newsLinks = (agent.page / "div.headlineSummary ul.newsItem h2 a" )
|
106
|
+
newsLinks.each do |a|
|
107
|
+
if(a.attr('href') =~ /^http:\/\/jp.wsj.com\/article\//)
|
108
|
+
toc_cat << [canonical( a.text.strip ), a.attr( 'href' )]
|
109
|
+
count += 1
|
110
|
+
break if(count >= 10)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
toc << toc_cat
|
114
|
+
end
|
115
|
+
|
116
|
+
begin
|
117
|
+
generate_contents( toc, agent )
|
118
|
+
yield "#{@dst_dir}/wsj-paid.opf"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def canonical( str )
|
125
|
+
str.gsub( /\uFF5E/, "\u301C" ) # for WAVE DASH problem
|
126
|
+
end
|
127
|
+
|
128
|
+
def retry_loop( times )
|
129
|
+
count = 0
|
130
|
+
begin
|
131
|
+
yield
|
132
|
+
rescue
|
133
|
+
count += 1
|
134
|
+
if count >= times
|
135
|
+
raise
|
136
|
+
else
|
137
|
+
News2Kindle.logger.debug $!
|
138
|
+
News2Kindle.logger.info "#{count} retry."
|
139
|
+
retry
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def html_header( title )
|
145
|
+
<<-HTML.gsub( /^\t/, '' )
|
146
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
147
|
+
<html>
|
148
|
+
<head>
|
149
|
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"></meta>
|
150
|
+
<title>#{title}</title>
|
151
|
+
<link rel="stylesheet" href="wsj.css" type="text/css" media="all"></link>
|
152
|
+
</head>
|
153
|
+
<body>
|
154
|
+
<h1>#{title}</h1>
|
155
|
+
HTML
|
156
|
+
end
|
157
|
+
|
158
|
+
def get_html_item( agent, uri, sub = nil )
|
159
|
+
aid = uri2aid( uri )
|
160
|
+
html = nil
|
161
|
+
if File::exist?( "#{@src_dir}/#{aid}#{sub}.html" ) # loading cache
|
162
|
+
html = Nokogiri( open( "#{@src_dir}/#{aid}#{sub}.html", 'r:utf-8', &:read ) )
|
163
|
+
else
|
164
|
+
begin
|
165
|
+
#puts "getting html #{aid}#{sub}"
|
166
|
+
retry_loop( 5 ) do
|
167
|
+
agent.get( uri )
|
168
|
+
html = agent.page.root
|
169
|
+
sleep 1
|
170
|
+
end
|
171
|
+
rescue
|
172
|
+
News2Kindle.logger.error "cannot get #{uri}."
|
173
|
+
raise
|
174
|
+
end
|
175
|
+
open( "#{@src_dir}/#{aid}#{sub}.html", 'w:utf-8' ) do |f|
|
176
|
+
f.write( html.to_html )
|
177
|
+
end
|
178
|
+
end
|
179
|
+
html
|
180
|
+
end
|
181
|
+
|
182
|
+
def scrape_html_item( html )
|
183
|
+
contents = (html / 'div#article_story_body')
|
184
|
+
|
185
|
+
if(contents.size == 0)
|
186
|
+
contents = (html / 'div#slideContainer')
|
187
|
+
if(contents.size > 0)
|
188
|
+
(contents / 'div.dSlideViewer').before((contents / 'div.dSlideViewer li.firstSlide').inner_html)
|
189
|
+
(contents / 'div.dSlideViewer, h2.header, ul.nav-inline').remove
|
190
|
+
end
|
191
|
+
else
|
192
|
+
signature = (contents / 'ul.socialByline')
|
193
|
+
if(signature.size > 0)
|
194
|
+
signature[0].before(signature.inner_text)
|
195
|
+
signature.remove
|
196
|
+
end
|
197
|
+
(contents / 'div.insettipBox , div.insetButton').remove
|
198
|
+
(contents / 'div.insetZoomTargetBox a').remove
|
199
|
+
(contents / 'div.legacyInset div.embedType-interactive').each {|d| d.parent.remove}
|
200
|
+
end
|
201
|
+
|
202
|
+
(contents / 'img').each do |image_tag|
|
203
|
+
image_url = image_tag.attr( 'src' )
|
204
|
+
image_file = File::basename( image_url )
|
205
|
+
if(File.exist?("#{@dst_dir}/#{image_file}"))
|
206
|
+
image_tag.set_attribute("src", image_file)
|
207
|
+
next
|
208
|
+
end
|
209
|
+
begin
|
210
|
+
image = open( image_url, &:read )
|
211
|
+
open( "#{@dst_dir}/#{image_file}", 'w' ){|fp| fp.write image}
|
212
|
+
image_tag.set_attribute("src", image_file)
|
213
|
+
rescue
|
214
|
+
News2Kindle.logger.warn "FAIL TO DOWNLOAD IMAGE: #{image_url}"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
contents.inner_html
|
219
|
+
end
|
220
|
+
|
221
|
+
def html_item( item, uri, agent )
|
222
|
+
aid = uri2aid( uri )
|
223
|
+
return '' unless aid
|
224
|
+
html = get_html_item( agent, uri )
|
225
|
+
|
226
|
+
open( "#{@dst_dir}/#{aid}.html", 'w:utf-8' ) do |f|
|
227
|
+
title_tag = (html / 'meta[@property="og:title"]')
|
228
|
+
title = title_tag.size > 0 ? title_tag[0].attr("content").strip : item
|
229
|
+
f.puts canonical( html_header( title ) )
|
230
|
+
|
231
|
+
f.puts scrape_html_item(html)
|
232
|
+
f.puts html_footer
|
233
|
+
end
|
234
|
+
|
235
|
+
%Q|\t\t<li><a href="#{aid}.html">#{item}</a></li>|
|
236
|
+
end
|
237
|
+
|
238
|
+
def html_footer
|
239
|
+
<<-HTML.gsub( /^\t/, '' )
|
240
|
+
</body>
|
241
|
+
</html>
|
242
|
+
HTML
|
243
|
+
end
|
244
|
+
|
245
|
+
def ncx_header
|
246
|
+
<<-XML.gsub( /^\t/, '' )
|
247
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
248
|
+
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
|
249
|
+
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
|
250
|
+
<docTitle><text>#{@title} (#{@now_str})</text></docTitle>
|
251
|
+
<navMap>
|
252
|
+
<navPoint id="toc" playOrder="0"><navLabel><text>Table of Contents</text></navLabel><content src="toc.html" /></navPoint>
|
253
|
+
XML
|
254
|
+
end
|
255
|
+
|
256
|
+
def ncx_item( item, uri, index )
|
257
|
+
aid = uri2aid( uri )
|
258
|
+
aid ? %Q|\t\t<navPoint id="#{aid}" playOrder="#{index}"><navLabel><text>#{item}</text></navLabel><content src="#{aid}.html" /></navPoint>| : ''
|
259
|
+
end
|
260
|
+
|
261
|
+
def ncx_footer
|
262
|
+
<<-XML.gsub( /^\t/, '' )
|
263
|
+
</navMap>
|
264
|
+
</ncx>
|
265
|
+
XML
|
266
|
+
end
|
267
|
+
|
268
|
+
def opf_header
|
269
|
+
<<-XML.gsub( /^\t/, '' )
|
270
|
+
<?xml version="1.0" encoding="utf-8"?>
|
271
|
+
<package unique-identifier="uid">
|
272
|
+
<metadata>
|
273
|
+
<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core" xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
|
274
|
+
<dc:Title>#{@title} (#{@now_str})</dc:Title>
|
275
|
+
<dc:Language>#{@lang}</dc:Language>
|
276
|
+
<dc:Creator>The Wall Street Journal Online</dc:Creator>
|
277
|
+
<dc:Description>#{@title}、#{@now_str}生成</dc:Description>
|
278
|
+
<dc:Date>#{@now.strftime( '%d/%m/%Y' )}</dc:Date>
|
279
|
+
</dc-metadata>
|
280
|
+
<x-metadata>
|
281
|
+
<output encoding="utf-8" content-type="text/x-oeb1-document"></output>
|
282
|
+
<EmbeddedCover>wsj.jpg</EmbeddedCover>
|
283
|
+
</x-metadata>
|
284
|
+
</metadata>
|
285
|
+
<manifest>
|
286
|
+
<item id="toc" media-type="application/x-dtbncx+xml" href="toc.ncx"></item>
|
287
|
+
<item id="style" media-type="text/css" href="wsj.css"></item>
|
288
|
+
<item id="index" media-type="text/html" href="toc.html"></item>
|
289
|
+
XML
|
290
|
+
end
|
291
|
+
|
292
|
+
def opf_item( uri )
|
293
|
+
aid = uri2aid( uri )
|
294
|
+
aid ? %Q|\t\t<item id="#{aid}" media-type="text/html" href="#{aid}.html"></item>| : ''
|
295
|
+
end
|
296
|
+
|
297
|
+
def opf_footer( aids )
|
298
|
+
r = <<-XML.gsub( /^\t/, '' )
|
299
|
+
</manifest>
|
300
|
+
<spine toc="toc">
|
301
|
+
XML
|
302
|
+
aids.each do |aid|
|
303
|
+
r << %Q|\t<itemref idref="#{aid}" />\n|
|
304
|
+
end
|
305
|
+
r << <<-XML.gsub( /^\t/, '' )
|
306
|
+
<itemref idref="index" />
|
307
|
+
</spine>
|
308
|
+
<tours></tours>
|
309
|
+
<guide>
|
310
|
+
<reference type="toc" title="Table of Contents" href="toc.html"></reference>
|
311
|
+
<reference type="start" title="Top Story" href="#{aids[0]}.html"></reference>
|
312
|
+
</guide>
|
313
|
+
</package>
|
314
|
+
XML
|
315
|
+
r
|
316
|
+
end
|
317
|
+
|
318
|
+
def uri2aid( uri )
|
319
|
+
uri.scan( %r|/article/([^/]*).html| ).flatten[0]
|
320
|
+
end
|
321
|
+
|
322
|
+
def generate_contents( toc, agent )
|
323
|
+
open( "#{@dst_dir}/toc.html", 'w:utf-8' ) do |html|
|
324
|
+
open( "#{@dst_dir}/toc.ncx", 'w:utf-8' ) do |ncx|
|
325
|
+
open( "#{@dst_dir}/wsj-paid.opf", 'w:utf-8' ) do |opf|
|
326
|
+
first = true
|
327
|
+
toc_index = 0
|
328
|
+
aids = []
|
329
|
+
ncx.puts ncx_header
|
330
|
+
opf.puts opf_header
|
331
|
+
toc.each do |category|
|
332
|
+
category.each do |article|
|
333
|
+
if article.class == String
|
334
|
+
html.puts first ?
|
335
|
+
html_header( 'Table of Contents' ) :
|
336
|
+
"\t</ul>\n\t<mbp:pagebreak />"
|
337
|
+
html.puts "\t<h2>#{article}</h2>"
|
338
|
+
html.puts "\t<ul>"
|
339
|
+
first = false
|
340
|
+
else
|
341
|
+
html.puts html_item( article[0], article[1], agent )
|
342
|
+
ncx.puts ncx_item( article[0], article[1], toc_index += 1 )
|
343
|
+
unless aids.index( uri2aid( article[1] ) )
|
344
|
+
opf.puts opf_item( article[1] )
|
345
|
+
aids << uri2aid( article[1] ) if uri2aid( article[1] )
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
html.puts "\t</ul>"
|
351
|
+
html.puts html_footer
|
352
|
+
ncx.puts ncx_footer
|
353
|
+
opf.puts opf_footer( aids )
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|