aozoragen 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -30,16 +30,33 @@ module Aozoragen
30
30
  book_title = metainfo[:title]
31
31
  (@index_html / 'ul.btnList li.withDate a' ).each do |a|
32
32
  uri = @index_uri + a.attr( :href )
33
- text = get_content( uri, book_title ).normalize_char
34
- yield( {id: Pathname( uri.path ).dirname.basename.to_s, uri: uri, text: text} )
33
+ get_content( uri, book_title ) do |u, t|
34
+ text = t.normalize_char
35
+ chap_id = "#{Pathname( u.path ).dirname.basename}-#{Pathname(u.path).basename('.html')}"
36
+ yield( {id: chap_id, uri: u, text: text} )
37
+ end
35
38
  end
36
39
  end
37
40
 
38
41
  def get_content( uri, book_title = '' )
42
+ text, html = get_page_content( uri, book_title )
43
+ yield uri, text
44
+
45
+ [].tap{|urls|
46
+ (html / 'ul.pageLink li a').each{|a| urls << (uri + a.attr( 'href' )) }
47
+ }.sort.uniq.each do |uri|
48
+ text, html = get_page_content( uri, book_title )
49
+ yield uri, text
50
+ end
51
+ end
52
+
53
+ private
54
+ def get_page_content( uri, book_title )
39
55
  text = ''
40
56
  html = open( uri, 'r:CP932', &:read ).encode( 'UTF-8' )
41
57
  html = html.gsub( /\&mdash;/, "\u2500" ).gsub( /\&quot;/, "\u201D" )
42
- (Nokogiri( html ) / 'div#mainContent' ).each do |content|
58
+ dom = Nokogiri( html )
59
+ (dom / 'div#mainContent' ).each do |content|
43
60
  (content / 'h3').each do |t|
44
61
  text << t.text.sub( /^『#{book_title}』 /, '' ).subhead
45
62
  end
@@ -50,7 +67,7 @@ module Aozoragen
50
67
  end
51
68
  end
52
69
  text << "[#改ページ]\n"
53
- text.for_tategaki
70
+ return [text.for_tategaki, dom]
54
71
  end
55
72
  end
56
73
  end
@@ -1,3 +1,3 @@
1
1
  module Aozoragen
2
- VERSION = "0.1.9"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aozoragen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-27 00:00:00.000000000 Z
12
+ date: 2012-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri