aozoragen 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +2 -0
- data/bin/aozoragen +2 -0
- data/lib/aozoragen/syosetu.rb +46 -0
- data/lib/aozoragen/util.rb +3 -3
- data/lib/aozoragen/version.rb +1 -1
- metadata +11 -14
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 531310930df10ff69b5c5804afc7f935f3014646
|
4
|
+
data.tar.gz: 2370bb692b45d2ecde94c9a7c62913a0d748376d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f626776afd09a2897f8d124ef6bcd60f393fc4dd4a68ee5f1a9295be9ae54b8f85094c1d4577e8c00ba0600ca6f2c5e686ae7b097aadee25fe5332279d67a3d2
|
7
|
+
data.tar.gz: cf3b020e6d721f465c060d53d391cef74bbb00d670f24ff9309ed216d777061d953a28614aed6d62382e8eaf6325b54700559780daa948a8f3f45689fd53f92f
|
data/README.md
CHANGED
@@ -21,6 +21,8 @@ Gemを使ってインストールする:
|
|
21
21
|
* 実行時に無償公開中の章のみが抽出される。
|
22
22
|
* レンザブロー <http://renzaburo.jp/>
|
23
23
|
* 指定例: http://renzaburo.jp/contents_t/061-katano/index.html
|
24
|
+
* 小説を読もう! <http://yomou.syosetu.com/>
|
25
|
+
* 指定例: http://ncode.syosetu.com/n8725k/
|
24
26
|
* Webミステリーズ! <http://www.webmysteries.jp/>
|
25
27
|
* Webミステリーズ!の掲載作品には目次ページがないため、GitHubのWikiで代用する。
|
26
28
|
* h1要素に書名、h2要素に続くリストで著者名、h3要素に続くリストで連載各回のURLを表現する。
|
data/bin/aozoragen
CHANGED
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# scraping yomou.syosetu.com
|
4
|
+
#
|
5
|
+
require 'aozoragen/util'
|
6
|
+
require 'open-uri'
|
7
|
+
require 'pathname'
|
8
|
+
|
9
|
+
module Aozoragen
|
10
|
+
class Syosetu
|
11
|
+
include Util
|
12
|
+
|
13
|
+
def initialize(index_uri)
|
14
|
+
@index_uri = index_uri
|
15
|
+
@index_html = Nokogiri(open(@index_uri, 'r:utf-8', &:read))
|
16
|
+
end
|
17
|
+
|
18
|
+
def metainfo
|
19
|
+
info = {:id => Pathname(@index_uri.path).basename.to_s, :author => []}
|
20
|
+
info[:title] = (@index_html / 'title')[0].text
|
21
|
+
info[:author] << (@index_html / '.novel_writername a')[0].text
|
22
|
+
info
|
23
|
+
end
|
24
|
+
|
25
|
+
def each_chapter
|
26
|
+
(@index_html / '.subtitle a').each do |a|
|
27
|
+
uri = @index_uri + a.attr('href')
|
28
|
+
|
29
|
+
chapter = Nokogiri(open(uri, 'r:utf-8', &:read))
|
30
|
+
text = get_chapter_text(chapter)
|
31
|
+
chapter_id = '%03d' % Pathname(uri.path).basename.to_s.to_i
|
32
|
+
yield({id: chapter_id, uri: uri, text: text})
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_chapter_text(chapter)
|
37
|
+
text = ''
|
38
|
+
text << (chapter / '.novel_subtitle')[0].text.subhead
|
39
|
+
(chapter / '#novel_honbun').each do |page|
|
40
|
+
text << detag(page).gsub(/\n\n/, "\n")
|
41
|
+
text << "[#改ページ]\n"
|
42
|
+
end
|
43
|
+
text.han2zen.for_tategaki
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/aozoragen/util.rb
CHANGED
@@ -20,7 +20,7 @@ class String
|
|
20
20
|
# replace characters fitting to vertical lyout
|
21
21
|
#
|
22
22
|
def for_tategaki
|
23
|
-
self.tr( '
|
23
|
+
self.tr( '<>-“”−', '∧∨―〃〃‐' ).han2zen
|
24
24
|
end
|
25
25
|
|
26
26
|
##
|
@@ -57,9 +57,9 @@ module Aozoragen
|
|
57
57
|
ruby.inner_html = '|' + ruby.inner_html
|
58
58
|
(ruby / 'rp').each do |rp|
|
59
59
|
case rp.text
|
60
|
-
when
|
60
|
+
when /[(\(]/
|
61
61
|
rp.inner_html = '《'
|
62
|
-
when
|
62
|
+
when /[)\)]/
|
63
63
|
rp.inner_html = '》'
|
64
64
|
end
|
65
65
|
end
|
data/lib/aozoragen/version.rb
CHANGED
metadata
CHANGED
@@ -1,30 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aozoragen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- TADA Tadashi
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-07 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - ">="
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
description: Scraping some Ebook web site and generating AOZORA format text files.
|
@@ -36,7 +33,7 @@ executables:
|
|
36
33
|
extensions: []
|
37
34
|
extra_rdoc_files: []
|
38
35
|
files:
|
39
|
-
- .gitignore
|
36
|
+
- ".gitignore"
|
40
37
|
- Gemfile
|
41
38
|
- README.md
|
42
39
|
- Rakefile
|
@@ -46,31 +43,31 @@ files:
|
|
46
43
|
- lib/aozoragen.rb
|
47
44
|
- lib/aozoragen/renzaburo.rb
|
48
45
|
- lib/aozoragen/sai-zen-sen.rb
|
46
|
+
- lib/aozoragen/syosetu.rb
|
49
47
|
- lib/aozoragen/util.rb
|
50
48
|
- lib/aozoragen/version.rb
|
51
49
|
- lib/aozoragen/webmysteries.rb
|
52
50
|
homepage: https://github.com/tdtds/aozoragen
|
53
51
|
licenses: []
|
52
|
+
metadata: {}
|
54
53
|
post_install_message:
|
55
54
|
rdoc_options: []
|
56
55
|
require_paths:
|
57
56
|
- lib
|
58
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
-
none: false
|
60
58
|
requirements:
|
61
|
-
- -
|
59
|
+
- - ">="
|
62
60
|
- !ruby/object:Gem::Version
|
63
61
|
version: '0'
|
64
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
63
|
requirements:
|
67
|
-
- -
|
64
|
+
- - ">="
|
68
65
|
- !ruby/object:Gem::Version
|
69
66
|
version: '0'
|
70
67
|
requirements: []
|
71
68
|
rubyforge_project:
|
72
|
-
rubygems_version:
|
69
|
+
rubygems_version: 2.2.2
|
73
70
|
signing_key:
|
74
|
-
specification_version:
|
71
|
+
specification_version: 4
|
75
72
|
summary: Generating AOZORA format text of eBook novels via some Web sites.
|
76
73
|
test_files: []
|