aozoragen 0.2.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +2 -0
- data/bin/aozoragen +2 -0
- data/lib/aozoragen/syosetu.rb +46 -0
- data/lib/aozoragen/util.rb +3 -3
- data/lib/aozoragen/version.rb +1 -1
- metadata +11 -14
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 531310930df10ff69b5c5804afc7f935f3014646
|
4
|
+
data.tar.gz: 2370bb692b45d2ecde94c9a7c62913a0d748376d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f626776afd09a2897f8d124ef6bcd60f393fc4dd4a68ee5f1a9295be9ae54b8f85094c1d4577e8c00ba0600ca6f2c5e686ae7b097aadee25fe5332279d67a3d2
|
7
|
+
data.tar.gz: cf3b020e6d721f465c060d53d391cef74bbb00d670f24ff9309ed216d777061d953a28614aed6d62382e8eaf6325b54700559780daa948a8f3f45689fd53f92f
|
data/README.md
CHANGED
@@ -21,6 +21,8 @@ Gemを使ってインストールする:
|
|
21
21
|
* 実行時に無償公開中の章のみが抽出される。
|
22
22
|
* レンザブロー <http://renzaburo.jp/>
|
23
23
|
* 指定例: http://renzaburo.jp/contents_t/061-katano/index.html
|
24
|
+
* 小説を読もう! <http://yomou.syosetu.com/>
|
25
|
+
* 指定例: http://ncode.syosetu.com/n8725k/
|
24
26
|
* Webミステリーズ! <http://www.webmysteries.jp/>
|
25
27
|
* Webミステリーズ!の掲載作品には目次ページがないため、GitHubのWikiで代用する。
|
26
28
|
* h1要素に書名、h2要素に続くリストで著者名、h3要素に続くリストで連載各回のURLを表現する。
|
data/bin/aozoragen
CHANGED
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# scraping yomou.syosetu.com
|
4
|
+
#
|
5
|
+
require 'aozoragen/util'
|
6
|
+
require 'open-uri'
|
7
|
+
require 'pathname'
|
8
|
+
|
9
|
+
module Aozoragen
|
10
|
+
class Syosetu
|
11
|
+
include Util
|
12
|
+
|
13
|
+
def initialize(index_uri)
|
14
|
+
@index_uri = index_uri
|
15
|
+
@index_html = Nokogiri(open(@index_uri, 'r:utf-8', &:read))
|
16
|
+
end
|
17
|
+
|
18
|
+
def metainfo
|
19
|
+
info = {:id => Pathname(@index_uri.path).basename.to_s, :author => []}
|
20
|
+
info[:title] = (@index_html / 'title')[0].text
|
21
|
+
info[:author] << (@index_html / '.novel_writername a')[0].text
|
22
|
+
info
|
23
|
+
end
|
24
|
+
|
25
|
+
def each_chapter
|
26
|
+
(@index_html / '.subtitle a').each do |a|
|
27
|
+
uri = @index_uri + a.attr('href')
|
28
|
+
|
29
|
+
chapter = Nokogiri(open(uri, 'r:utf-8', &:read))
|
30
|
+
text = get_chapter_text(chapter)
|
31
|
+
chapter_id = '%03d' % Pathname(uri.path).basename.to_s.to_i
|
32
|
+
yield({id: chapter_id, uri: uri, text: text})
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_chapter_text(chapter)
|
37
|
+
text = ''
|
38
|
+
text << (chapter / '.novel_subtitle')[0].text.subhead
|
39
|
+
(chapter / '#novel_honbun').each do |page|
|
40
|
+
text << detag(page).gsub(/\n\n/, "\n")
|
41
|
+
text << "[#改ページ]\n"
|
42
|
+
end
|
43
|
+
text.han2zen.for_tategaki
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/aozoragen/util.rb
CHANGED
@@ -20,7 +20,7 @@ class String
|
|
20
20
|
# replace characters fitting to vertical lyout
|
21
21
|
#
|
22
22
|
def for_tategaki
|
23
|
-
self.tr( '
|
23
|
+
self.tr( '<>-“”−', '∧∨―〃〃‐' ).han2zen
|
24
24
|
end
|
25
25
|
|
26
26
|
##
|
@@ -57,9 +57,9 @@ module Aozoragen
|
|
57
57
|
ruby.inner_html = '|' + ruby.inner_html
|
58
58
|
(ruby / 'rp').each do |rp|
|
59
59
|
case rp.text
|
60
|
-
when
|
60
|
+
when /[(\(]/
|
61
61
|
rp.inner_html = '《'
|
62
|
-
when
|
62
|
+
when /[)\)]/
|
63
63
|
rp.inner_html = '》'
|
64
64
|
end
|
65
65
|
end
|
data/lib/aozoragen/version.rb
CHANGED
metadata
CHANGED
@@ -1,30 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aozoragen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- TADA Tadashi
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-07 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - ">="
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
description: Scraping some Ebook web site and generating AOZORA format text files.
|
@@ -36,7 +33,7 @@ executables:
|
|
36
33
|
extensions: []
|
37
34
|
extra_rdoc_files: []
|
38
35
|
files:
|
39
|
-
- .gitignore
|
36
|
+
- ".gitignore"
|
40
37
|
- Gemfile
|
41
38
|
- README.md
|
42
39
|
- Rakefile
|
@@ -46,31 +43,31 @@ files:
|
|
46
43
|
- lib/aozoragen.rb
|
47
44
|
- lib/aozoragen/renzaburo.rb
|
48
45
|
- lib/aozoragen/sai-zen-sen.rb
|
46
|
+
- lib/aozoragen/syosetu.rb
|
49
47
|
- lib/aozoragen/util.rb
|
50
48
|
- lib/aozoragen/version.rb
|
51
49
|
- lib/aozoragen/webmysteries.rb
|
52
50
|
homepage: https://github.com/tdtds/aozoragen
|
53
51
|
licenses: []
|
52
|
+
metadata: {}
|
54
53
|
post_install_message:
|
55
54
|
rdoc_options: []
|
56
55
|
require_paths:
|
57
56
|
- lib
|
58
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
-
none: false
|
60
58
|
requirements:
|
61
|
-
- -
|
59
|
+
- - ">="
|
62
60
|
- !ruby/object:Gem::Version
|
63
61
|
version: '0'
|
64
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
63
|
requirements:
|
67
|
-
- -
|
64
|
+
- - ">="
|
68
65
|
- !ruby/object:Gem::Version
|
69
66
|
version: '0'
|
70
67
|
requirements: []
|
71
68
|
rubyforge_project:
|
72
|
-
rubygems_version:
|
69
|
+
rubygems_version: 2.2.2
|
73
70
|
signing_key:
|
74
|
-
specification_version:
|
71
|
+
specification_version: 4
|
75
72
|
summary: Generating AOZORA format text of eBook novels via some Web sites.
|
76
73
|
test_files: []
|