aozoragen 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 531310930df10ff69b5c5804afc7f935f3014646
4
+ data.tar.gz: 2370bb692b45d2ecde94c9a7c62913a0d748376d
5
+ SHA512:
6
+ metadata.gz: f626776afd09a2897f8d124ef6bcd60f393fc4dd4a68ee5f1a9295be9ae54b8f85094c1d4577e8c00ba0600ca6f2c5e686ae7b097aadee25fe5332279d67a3d2
7
+ data.tar.gz: cf3b020e6d721f465c060d53d391cef74bbb00d670f24ff9309ed216d777061d953a28614aed6d62382e8eaf6325b54700559780daa948a8f3f45689fd53f92f
data/README.md CHANGED
@@ -21,6 +21,8 @@ Gemを使ってインストールする:
21
21
  * 実行時に無償公開中の章のみが抽出される。
22
22
  * レンザブロー <http://renzaburo.jp/>
23
23
  * 指定例: http://renzaburo.jp/contents_t/061-katano/index.html
24
+ * 小説を読もう! <http://yomou.syosetu.com/>
25
+ * 指定例: http://ncode.syosetu.com/n8725k/
24
26
  * Webミステリーズ! <http://www.webmysteries.jp/>
25
27
  * Webミステリーズ!の掲載作品には目次ページがないため、GitHubのWikiで代用する。
26
28
  * h1要素に書名、h2要素に続くリストで著者名、h3要素に続くリストで連載各回のURLを表現する。
@@ -46,6 +46,8 @@ ARGV.each do |u|
46
46
  'sai-zen-sen'
47
47
  when 'renzaburo.jp'
48
48
  'renzaburo'
49
+ when 'ncode.syosetu.com'
50
+ 'syosetu'
49
51
  when 'github.com'
50
52
  case Pathname( uri.path ).basename.to_s.sub( %r|(.*?)-.*$|, '\1' )
51
53
  when 'webmysteries'
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # scraping yomou.syosetu.com
4
+ #
5
+ require 'aozoragen/util'
6
+ require 'open-uri'
7
+ require 'pathname'
8
+
9
+ module Aozoragen
10
+ class Syosetu
11
+ include Util
12
+
13
+ def initialize(index_uri)
14
+ @index_uri = index_uri
15
+ @index_html = Nokogiri(open(@index_uri, 'r:utf-8', &:read))
16
+ end
17
+
18
+ def metainfo
19
+ info = {:id => Pathname(@index_uri.path).basename.to_s, :author => []}
20
+ info[:title] = (@index_html / 'title')[0].text
21
+ info[:author] << (@index_html / '.novel_writername a')[0].text
22
+ info
23
+ end
24
+
25
+ def each_chapter
26
+ (@index_html / '.subtitle a').each do |a|
27
+ uri = @index_uri + a.attr('href')
28
+
29
+ chapter = Nokogiri(open(uri, 'r:utf-8', &:read))
30
+ text = get_chapter_text(chapter)
31
+ chapter_id = '%03d' % Pathname(uri.path).basename.to_s.to_i
32
+ yield({id: chapter_id, uri: uri, text: text})
33
+ end
34
+ end
35
+
36
+ def get_chapter_text(chapter)
37
+ text = ''
38
+ text << (chapter / '.novel_subtitle')[0].text.subhead
39
+ (chapter / '#novel_honbun').each do |page|
40
+ text << detag(page).gsub(/\n\n/, "\n")
41
+ text << "[#改ページ]\n"
42
+ end
43
+ text.han2zen.for_tategaki
44
+ end
45
+ end
46
+ end
@@ -20,7 +20,7 @@ class String
20
20
  # replace characters fitting to vertical lyout
21
21
  #
22
22
  def for_tategaki
23
- self.tr( '<>−', '∧∨‐' ).han2zen
23
+ self.tr( '<>-“”−', '∧∨―〃〃‐' ).han2zen
24
24
  end
25
25
 
26
26
  ##
@@ -57,9 +57,9 @@ module Aozoragen
57
57
  ruby.inner_html = '|' + ruby.inner_html
58
58
  (ruby / 'rp').each do |rp|
59
59
  case rp.text
60
- when '('
60
+ when /[(\(]/
61
61
  rp.inner_html = '《'
62
- when ')'
62
+ when /[)\)]/
63
63
  rp.inner_html = '》'
64
64
  end
65
65
  end
@@ -1,3 +1,3 @@
1
1
  module Aozoragen
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,30 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aozoragen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
5
- prerelease:
4
+ version: 0.3.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - TADA Tadashi
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-05-28 00:00:00.000000000 Z
11
+ date: 2014-09-07 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: nokogiri
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - ">="
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  description: Scraping some Ebook web site and generating AOZORA format text files.
@@ -36,7 +33,7 @@ executables:
36
33
  extensions: []
37
34
  extra_rdoc_files: []
38
35
  files:
39
- - .gitignore
36
+ - ".gitignore"
40
37
  - Gemfile
41
38
  - README.md
42
39
  - Rakefile
@@ -46,31 +43,31 @@ files:
46
43
  - lib/aozoragen.rb
47
44
  - lib/aozoragen/renzaburo.rb
48
45
  - lib/aozoragen/sai-zen-sen.rb
46
+ - lib/aozoragen/syosetu.rb
49
47
  - lib/aozoragen/util.rb
50
48
  - lib/aozoragen/version.rb
51
49
  - lib/aozoragen/webmysteries.rb
52
50
  homepage: https://github.com/tdtds/aozoragen
53
51
  licenses: []
52
+ metadata: {}
54
53
  post_install_message:
55
54
  rdoc_options: []
56
55
  require_paths:
57
56
  - lib
58
57
  required_ruby_version: !ruby/object:Gem::Requirement
59
- none: false
60
58
  requirements:
61
- - - ! '>='
59
+ - - ">="
62
60
  - !ruby/object:Gem::Version
63
61
  version: '0'
64
62
  required_rubygems_version: !ruby/object:Gem::Requirement
65
- none: false
66
63
  requirements:
67
- - - ! '>='
64
+ - - ">="
68
65
  - !ruby/object:Gem::Version
69
66
  version: '0'
70
67
  requirements: []
71
68
  rubyforge_project:
72
- rubygems_version: 1.8.23
69
+ rubygems_version: 2.2.2
73
70
  signing_key:
74
- specification_version: 3
71
+ specification_version: 4
75
72
  summary: Generating AOZORA format text of eBook novels via some Web sites.
76
73
  test_files: []