aozoragen 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ *.txt
6
+ *.pdf
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in aozoragen.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/aozoragen.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "aozoragen/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "aozoragen"
7
+ s.version = Aozoragen::VERSION
8
+ s.authors = ["TADA Tadashi"]
9
+ s.email = ["t@tdtds.jp"]
10
+ s.homepage = ""
11
+ s.summary = "Generating AOZORA format text of eBook novels via some Web sites."
12
+ s.description = "Scraping some Ebook web site and generating AOZORA format text files."
13
+
14
+ s.rubyforge_project = "aozoragen"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ # s.add_development_dependency "rspec"
23
+ s.add_runtime_dependency "nokogiri"
24
+ end
data/bin/aozora2pdf ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8; -*-
3
+ #
4
+ # aozora2pdf: converting AOZORA format text to PDF using Aozora-Kindle servive.
5
+ # see: http://a2k.aill.org/
6
+ #
7
+ # sample usage:
8
+ # % aozora2pdf hoge*.txt > hoge.pdf
9
+ #
10
+ # Copyright (C) 2012 by TADA Tadashi <t@tdtds.jp>
11
+ # Distributed under GPL
12
+ #
13
+ require 'uri'
14
+ require 'net/http'
15
+ require 'cgi'
16
+
17
+ uri = URI( 'http://a2k.aill.org/download.cgi' )
18
+ text = ARGF.read.force_encoding( 'UTF-8' )
19
+ res = Net::HTTP.post_form( uri, 's' => 's', 'text' => text )
20
+ print res.body
data/bin/aozoragen ADDED
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8; -*-
3
+ #
4
+ # aozoragen: generating AOZORA format text files from scraping some ebook sites.
5
+ #
6
+ # Copyright (C) 2012 by TADA Tadashi <t@tdtds.jp>
7
+ # Distributed under GPL
8
+ #
9
+ require 'uri'
10
+
11
+ ARGV.each do |u|
12
+ uri = URI( u )
13
+ book = nil
14
+ case uri.host
15
+ when 'sai-zen-sen.jp'
16
+ require 'aozoragen/sai-zen-sen'
17
+ book = SaiZenSen::new( uri )
18
+ else
19
+ p "Error: unknown URI: #{uri}"
20
+ end
21
+ next unless book
22
+
23
+ meta = book.metainfo
24
+ open( "#{meta[:id]}.00.txt", 'w' ) do |w|
25
+ w.puts "#{meta[:title]}\n#{meta[:author].join ' / '}\n\n\n[#改ページ]"
26
+ end
27
+
28
+ book.each_chapter do |chapter|
29
+ open( "#{meta[:id]}.#{chapter[:id]}.txt", 'w' ) do |w|
30
+ w.puts chapter[:text]
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,64 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # scraping sai-zen-sen.jp
4
+ #
5
+ require 'nokogiri'
6
+ require 'open-uri'
7
+ require 'pathname'
8
+
9
+ class SaiZenSen
10
+ def initialize( index_uri )
11
+ @index_uri = index_uri
12
+ @index_html = Nokogiri( open( @index_uri, 'r:utf-8', &:read ) )
13
+ end
14
+
15
+ def metainfo
16
+ info = {:id => Pathname( @index_uri.path ).basename.to_s}
17
+ info[:title] = (@index_html / '#page-content-heading h1')[0].text
18
+ (@index_html / '#authors h3').each do |author|
19
+ info[:author] = (info[:author] || [] ) << author.text.sub( /.*? /, '' )
20
+ end
21
+ info
22
+ end
23
+
24
+ def each_chapter
25
+ (@index_html / '#back-numbers li a').each do |a|
26
+ uri = @index_uri + a.attr('href')
27
+ chapter = Nokogiri( open( uri, 'r:utf-8', &:read ) )
28
+ text = ''
29
+ (chapter / 'section.book-page-spread').each do |page|
30
+ page./( 'hgroup', 'div.pgroup' ).each do |section|
31
+ case section.name
32
+ when 'hgroup'
33
+ text << "\n     #{detag section}\n\n"
34
+ when 'div'
35
+ if section.attr( 'class' ) =~ /delimiter/
36
+ text << ' ' * 5 << '─' * 10 << "\n"
37
+ else
38
+ section.children.each do |paragraph|
39
+ next unless paragraph.name == 'p'
40
+ text << " #{detag paragraph}\n"
41
+ end
42
+ end
43
+ text << "\n"
44
+ end
45
+ end
46
+ text << "[#改ページ]\n"
47
+ end
48
+
49
+ yield( {id: Pathname( uri.path ).dirname.basename.to_s, uri: uri, text: text} )
50
+ end
51
+ end
52
+
53
+ def detag( elem )
54
+ (elem / 'ruby rp').each do |rp|
55
+ case rp.text
56
+ when '('
57
+ rp.inner_html = '《'
58
+ when ')'
59
+ rp.inner_html = '》'
60
+ end
61
+ end
62
+ elem.to_html.gsub( /<.*?>/, '' ).strip
63
+ end
64
+ end
@@ -0,0 +1,3 @@
1
+ module Aozoragen
2
+ VERSION = "0.0.1"
3
+ end
data/lib/aozoragen.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "aozoragen/version"
2
+
3
+ module Aozoragen
4
+ # Your code goes here...
5
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: aozoragen
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - TADA Tadashi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &71959020 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *71959020
25
+ description: Scraping some Ebook web site and generating AOZORA format text files.
26
+ email:
27
+ - t@tdtds.jp
28
+ executables:
29
+ - aozora2pdf
30
+ - aozoragen
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - .gitignore
35
+ - Gemfile
36
+ - Rakefile
37
+ - aozoragen.gemspec
38
+ - bin/aozora2pdf
39
+ - bin/aozoragen
40
+ - lib/aozoragen.rb
41
+ - lib/aozoragen/sai-zen-sen.rb
42
+ - lib/aozoragen/version.rb
43
+ homepage: ''
44
+ licenses: []
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project: aozoragen
63
+ rubygems_version: 1.8.11
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Generating AOZORA format text of eBook novels via some Web sites.
67
+ test_files: []