aozoragen 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,16 +30,33 @@ module Aozoragen
30
30
  book_title = metainfo[:title]
31
31
  (@index_html / 'ul.btnList li.withDate a' ).each do |a|
32
32
  uri = @index_uri + a.attr( :href )
33
- text = get_content( uri, book_title ).normalize_char
34
- yield( {id: Pathname( uri.path ).dirname.basename.to_s, uri: uri, text: text} )
33
+ get_content( uri, book_title ) do |u, t|
34
+ text = t.normalize_char
35
+ chap_id = "#{Pathname( u.path ).dirname.basename}-#{Pathname(u.path).basename('.html')}"
36
+ yield( {id: chap_id, uri: u, text: text} )
37
+ end
35
38
  end
36
39
  end
37
40
 
38
41
  def get_content( uri, book_title = '' )
42
+ text, html = get_page_content( uri, book_title )
43
+ yield uri, text
44
+
45
+ [].tap{|urls|
46
+ (html / 'ul.pageLink li a').each{|a| urls << (uri + a.attr( 'href' )) }
47
+ }.sort.uniq.each do |uri|
48
+ text, html = get_page_content( uri, book_title )
49
+ yield uri, text
50
+ end
51
+ end
52
+
53
+ private
54
+ def get_page_content( uri, book_title )
39
55
  text = ''
40
56
  html = open( uri, 'r:CP932', &:read ).encode( 'UTF-8' )
41
57
  html = html.gsub( /\&mdash;/, "\u2500" ).gsub( /\&quot;/, "\u201D" )
42
- (Nokogiri( html ) / 'div#mainContent' ).each do |content|
58
+ dom = Nokogiri( html )
59
+ (dom / 'div#mainContent' ).each do |content|
43
60
  (content / 'h3').each do |t|
44
61
  text << t.text.sub( /^『#{book_title}』 /, '' ).subhead
45
62
  end
@@ -50,7 +67,7 @@ module Aozoragen
50
67
  end
51
68
  end
52
69
  text << "[#改ページ]\n"
53
- text.for_tategaki
70
+ return [text.for_tategaki, dom]
54
71
  end
55
72
  end
56
73
  end
@@ -1,3 +1,3 @@
1
1
  module Aozoragen
2
- VERSION = "0.1.9"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aozoragen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-27 00:00:00.000000000 Z
12
+ date: 2012-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri