aozoragen 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/aozoragen/renzaburo.rb +21 -4
- data/lib/aozoragen/version.rb +1 -1
- metadata +2 -2
data/lib/aozoragen/renzaburo.rb
CHANGED
@@ -30,16 +30,33 @@ module Aozoragen
|
|
30
30
|
book_title = metainfo[:title]
|
31
31
|
(@index_html / 'ul.btnList li.withDate a' ).each do |a|
|
32
32
|
uri = @index_uri + a.attr( :href )
|
33
|
-
|
34
|
-
|
33
|
+
get_content( uri, book_title ) do |u, t|
|
34
|
+
text = t.normalize_char
|
35
|
+
chap_id = "#{Pathname( u.path ).dirname.basename}-#{Pathname(u.path).basename('.html')}"
|
36
|
+
yield( {id: chap_id, uri: u, text: text} )
|
37
|
+
end
|
35
38
|
end
|
36
39
|
end
|
37
40
|
|
38
41
|
def get_content( uri, book_title = '' )
|
42
|
+
text, html = get_page_content( uri, book_title )
|
43
|
+
yield uri, text
|
44
|
+
|
45
|
+
[].tap{|urls|
|
46
|
+
(html / 'ul.pageLink li a').each{|a| urls << (uri + a.attr( 'href' )) }
|
47
|
+
}.sort.uniq.each do |uri|
|
48
|
+
text, html = get_page_content( uri, book_title )
|
49
|
+
yield uri, text
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
def get_page_content( uri, book_title )
|
39
55
|
text = ''
|
40
56
|
html = open( uri, 'r:CP932', &:read ).encode( 'UTF-8' )
|
41
57
|
html = html.gsub( /\—/, "\u2500" ).gsub( /\"/, "\u201D" )
|
42
|
-
|
58
|
+
dom = Nokogiri( html )
|
59
|
+
(dom / 'div#mainContent' ).each do |content|
|
43
60
|
(content / 'h3').each do |t|
|
44
61
|
text << t.text.sub( /^『#{book_title}』 /, '' ).subhead
|
45
62
|
end
|
@@ -50,7 +67,7 @@ module Aozoragen
|
|
50
67
|
end
|
51
68
|
end
|
52
69
|
text << "[#改ページ]\n"
|
53
|
-
text.for_tategaki
|
70
|
+
return [text.for_tategaki, dom]
|
54
71
|
end
|
55
72
|
end
|
56
73
|
end
|
data/lib/aozoragen/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aozoragen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-05-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|