aozoragen 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ *.txt
6
+ *.pdf
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in aozoragen.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/aozoragen.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "aozoragen/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "aozoragen"
7
+ s.version = Aozoragen::VERSION
8
+ s.authors = ["TADA Tadashi"]
9
+ s.email = ["t@tdtds.jp"]
10
+ s.homepage = ""
11
+ s.summary = "Generating AOZORA format text of eBook novels via some Web sites."
12
+ s.description = "Scraping some Ebook web site and generating AOZORA format text files."
13
+
14
+ s.rubyforge_project = "aozoragen"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ # s.add_development_dependency "rspec"
23
+ s.add_runtime_dependency "nokogiri"
24
+ end
data/bin/aozora2pdf ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8; -*-
3
+ #
4
+ # aozora2pdf: converting AOZORA format text to PDF using Aozora-Kindle servive.
5
+ # see: http://a2k.aill.org/
6
+ #
7
+ # sample usage:
8
+ # % aozora2pdf hoge*.txt > hoge.pdf
9
+ #
10
+ # Copyright (C) 2012 by TADA Tadashi <t@tdtds.jp>
11
+ # Distributed under GPL
12
+ #
13
+ require 'uri'
14
+ require 'net/http'
15
+ require 'cgi'
16
+
17
+ uri = URI( 'http://a2k.aill.org/download.cgi' )
18
+ text = ARGF.read.force_encoding( 'UTF-8' )
19
+ res = Net::HTTP.post_form( uri, 's' => 's', 'text' => text )
20
+ print res.body
data/bin/aozoragen ADDED
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8; -*-
3
+ #
4
+ # aozoragen: generating AOZORA format text files from scraping some ebook sites.
5
+ #
6
+ # Copyright (C) 2012 by TADA Tadashi <t@tdtds.jp>
7
+ # Distributed under GPL
8
+ #
9
+ require 'uri'
10
+
11
+ ARGV.each do |u|
12
+ uri = URI( u )
13
+ book = nil
14
+ case uri.host
15
+ when 'sai-zen-sen.jp'
16
+ require 'aozoragen/sai-zen-sen'
17
+ book = SaiZenSen::new( uri )
18
+ else
19
+ p "Error: unknown URI: #{uri}"
20
+ end
21
+ next unless book
22
+
23
+ meta = book.metainfo
24
+ open( "#{meta[:id]}.00.txt", 'w' ) do |w|
25
+ w.puts "#{meta[:title]}\n#{meta[:author].join ' / '}\n\n\n[#改ページ]"
26
+ end
27
+
28
+ book.each_chapter do |chapter|
29
+ open( "#{meta[:id]}.#{chapter[:id]}.txt", 'w' ) do |w|
30
+ w.puts chapter[:text]
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,64 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # scraping sai-zen-sen.jp
4
+ #
5
+ require 'nokogiri'
6
+ require 'open-uri'
7
+ require 'pathname'
8
+
9
+ class SaiZenSen
10
+ def initialize( index_uri )
11
+ @index_uri = index_uri
12
+ @index_html = Nokogiri( open( @index_uri, 'r:utf-8', &:read ) )
13
+ end
14
+
15
+ def metainfo
16
+ info = {:id => Pathname( @index_uri.path ).basename.to_s}
17
+ info[:title] = (@index_html / '#page-content-heading h1')[0].text
18
+ (@index_html / '#authors h3').each do |author|
19
+ info[:author] = (info[:author] || [] ) << author.text.sub( /.*? /, '' )
20
+ end
21
+ info
22
+ end
23
+
24
+ def each_chapter
25
+ (@index_html / '#back-numbers li a').each do |a|
26
+ uri = @index_uri + a.attr('href')
27
+ chapter = Nokogiri( open( uri, 'r:utf-8', &:read ) )
28
+ text = ''
29
+ (chapter / 'section.book-page-spread').each do |page|
30
+ page./( 'hgroup', 'div.pgroup' ).each do |section|
31
+ case section.name
32
+ when 'hgroup'
33
+ text << "\n     #{detag section}\n\n"
34
+ when 'div'
35
+ if section.attr( 'class' ) =~ /delimiter/
36
+ text << ' ' * 5 << '─' * 10 << "\n"
37
+ else
38
+ section.children.each do |paragraph|
39
+ next unless paragraph.name == 'p'
40
+ text << " #{detag paragraph}\n"
41
+ end
42
+ end
43
+ text << "\n"
44
+ end
45
+ end
46
+ text << "[#改ページ]\n"
47
+ end
48
+
49
+ yield( {id: Pathname( uri.path ).dirname.basename.to_s, uri: uri, text: text} )
50
+ end
51
+ end
52
+
53
+ def detag( elem )
54
+ (elem / 'ruby rp').each do |rp|
55
+ case rp.text
56
+ when '('
57
+ rp.inner_html = '《'
58
+ when ')'
59
+ rp.inner_html = '》'
60
+ end
61
+ end
62
+ elem.to_html.gsub( /<.*?>/, '' ).strip
63
+ end
64
+ end
@@ -0,0 +1,3 @@
1
+ module Aozoragen
2
+ VERSION = "0.0.1"
3
+ end
data/lib/aozoragen.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "aozoragen/version"
2
+
3
+ module Aozoragen
4
+ # Your code goes here...
5
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: aozoragen
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - TADA Tadashi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &71959020 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *71959020
25
+ description: Scraping some Ebook web site and generating AOZORA format text files.
26
+ email:
27
+ - t@tdtds.jp
28
+ executables:
29
+ - aozora2pdf
30
+ - aozoragen
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - .gitignore
35
+ - Gemfile
36
+ - Rakefile
37
+ - aozoragen.gemspec
38
+ - bin/aozora2pdf
39
+ - bin/aozoragen
40
+ - lib/aozoragen.rb
41
+ - lib/aozoragen/sai-zen-sen.rb
42
+ - lib/aozoragen/version.rb
43
+ homepage: ''
44
+ licenses: []
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project: aozoragen
63
+ rubygems_version: 1.8.11
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Generating AOZORA format text of eBook novels via some Web sites.
67
+ test_files: []