aozoragen 0.2.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 531310930df10ff69b5c5804afc7f935f3014646
4
+ data.tar.gz: 2370bb692b45d2ecde94c9a7c62913a0d748376d
5
+ SHA512:
6
+ metadata.gz: f626776afd09a2897f8d124ef6bcd60f393fc4dd4a68ee5f1a9295be9ae54b8f85094c1d4577e8c00ba0600ca6f2c5e686ae7b097aadee25fe5332279d67a3d2
7
+ data.tar.gz: cf3b020e6d721f465c060d53d391cef74bbb00d670f24ff9309ed216d777061d953a28614aed6d62382e8eaf6325b54700559780daa948a8f3f45689fd53f92f
data/README.md CHANGED
@@ -21,6 +21,8 @@ Gemを使ってインストールする:
21
21
  * 実行時に無償公開中の章のみが抽出される。
22
22
  * レンザブロー <http://renzaburo.jp/>
23
23
  * 指定例: http://renzaburo.jp/contents_t/061-katano/index.html
24
+ * 小説を読もう! <http://yomou.syosetu.com/>
25
+ * 指定例: http://ncode.syosetu.com/n8725k/
24
26
  * Webミステリーズ! <http://www.webmysteries.jp/>
25
27
  * Webミステリーズ!の掲載作品には目次ページがないため、GitHubのWikiで代用する。
26
28
  * h1要素に書名、h2要素に続くリストで著者名、h3要素に続くリストで連載各回のURLを表現する。
@@ -46,6 +46,8 @@ ARGV.each do |u|
46
46
  'sai-zen-sen'
47
47
  when 'renzaburo.jp'
48
48
  'renzaburo'
49
+ when 'ncode.syosetu.com'
50
+ 'syosetu'
49
51
  when 'github.com'
50
52
  case Pathname( uri.path ).basename.to_s.sub( %r|(.*?)-.*$|, '\1' )
51
53
  when 'webmysteries'
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # scraping yomou.syosetu.com
4
+ #
5
+ require 'aozoragen/util'
6
+ require 'open-uri'
7
+ require 'pathname'
8
+
9
+ module Aozoragen
10
+ class Syosetu
11
+ include Util
12
+
13
+ def initialize(index_uri)
14
+ @index_uri = index_uri
15
+ @index_html = Nokogiri(open(@index_uri, 'r:utf-8', &:read))
16
+ end
17
+
18
+ def metainfo
19
+ info = {:id => Pathname(@index_uri.path).basename.to_s, :author => []}
20
+ info[:title] = (@index_html / 'title')[0].text
21
+ info[:author] << (@index_html / '.novel_writername a')[0].text
22
+ info
23
+ end
24
+
25
+ def each_chapter
26
+ (@index_html / '.subtitle a').each do |a|
27
+ uri = @index_uri + a.attr('href')
28
+
29
+ chapter = Nokogiri(open(uri, 'r:utf-8', &:read))
30
+ text = get_chapter_text(chapter)
31
+ chapter_id = '%03d' % Pathname(uri.path).basename.to_s.to_i
32
+ yield({id: chapter_id, uri: uri, text: text})
33
+ end
34
+ end
35
+
36
+ def get_chapter_text(chapter)
37
+ text = ''
38
+ text << (chapter / '.novel_subtitle')[0].text.subhead
39
+ (chapter / '#novel_honbun').each do |page|
40
+ text << detag(page).gsub(/\n\n/, "\n")
41
+ text << "[#改ページ]\n"
42
+ end
43
+ text.han2zen.for_tategaki
44
+ end
45
+ end
46
+ end
@@ -20,7 +20,7 @@ class String
20
20
  # replace characters fitting to vertical lyout
21
21
  #
22
22
  def for_tategaki
23
- self.tr( '<>−', '∧∨‐' ).han2zen
23
+ self.tr( '<>-“”−', '∧∨―〃〃‐' ).han2zen
24
24
  end
25
25
 
26
26
  ##
@@ -57,9 +57,9 @@ module Aozoragen
57
57
  ruby.inner_html = '|' + ruby.inner_html
58
58
  (ruby / 'rp').each do |rp|
59
59
  case rp.text
60
- when '('
60
+ when /[(\(]/
61
61
  rp.inner_html = '《'
62
- when ')'
62
+ when /[)\)]/
63
63
  rp.inner_html = '》'
64
64
  end
65
65
  end
@@ -1,3 +1,3 @@
1
1
  module Aozoragen
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,30 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aozoragen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
5
- prerelease:
4
+ version: 0.3.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - TADA Tadashi
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-05-28 00:00:00.000000000 Z
11
+ date: 2014-09-07 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: nokogiri
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - ">="
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  description: Scraping some Ebook web site and generating AOZORA format text files.
@@ -36,7 +33,7 @@ executables:
36
33
  extensions: []
37
34
  extra_rdoc_files: []
38
35
  files:
39
- - .gitignore
36
+ - ".gitignore"
40
37
  - Gemfile
41
38
  - README.md
42
39
  - Rakefile
@@ -46,31 +43,31 @@ files:
46
43
  - lib/aozoragen.rb
47
44
  - lib/aozoragen/renzaburo.rb
48
45
  - lib/aozoragen/sai-zen-sen.rb
46
+ - lib/aozoragen/syosetu.rb
49
47
  - lib/aozoragen/util.rb
50
48
  - lib/aozoragen/version.rb
51
49
  - lib/aozoragen/webmysteries.rb
52
50
  homepage: https://github.com/tdtds/aozoragen
53
51
  licenses: []
52
+ metadata: {}
54
53
  post_install_message:
55
54
  rdoc_options: []
56
55
  require_paths:
57
56
  - lib
58
57
  required_ruby_version: !ruby/object:Gem::Requirement
59
- none: false
60
58
  requirements:
61
- - - ! '>='
59
+ - - ">="
62
60
  - !ruby/object:Gem::Version
63
61
  version: '0'
64
62
  required_rubygems_version: !ruby/object:Gem::Requirement
65
- none: false
66
63
  requirements:
67
- - - ! '>='
64
+ - - ">="
68
65
  - !ruby/object:Gem::Version
69
66
  version: '0'
70
67
  requirements: []
71
68
  rubyforge_project:
72
- rubygems_version: 1.8.23
69
+ rubygems_version: 2.2.2
73
70
  signing_key:
74
- specification_version: 3
71
+ specification_version: 4
75
72
  summary: Generating AOZORA format text of eBook novels via some Web sites.
76
73
  test_files: []