audio_book_creator 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.rspec +4 -0
- data/.travis.yml +8 -0
- data/Gemfile +8 -0
- data/README.md +60 -0
- data/Rakefile +8 -0
- data/audio_book_creator.gemspec +31 -0
- data/bin/audio_book_creator +6 -0
- data/lib/audio_book_creator.rb +59 -0
- data/lib/audio_book_creator/binder.rb +61 -0
- data/lib/audio_book_creator/book_creator.rb +31 -0
- data/lib/audio_book_creator/book_def.rb +36 -0
- data/lib/audio_book_creator/cached_hash.rb +20 -0
- data/lib/audio_book_creator/cascading_array.rb +57 -0
- data/lib/audio_book_creator/chapter.rb +33 -0
- data/lib/audio_book_creator/cli.rb +119 -0
- data/lib/audio_book_creator/conductor.rb +67 -0
- data/lib/audio_book_creator/editor.rb +20 -0
- data/lib/audio_book_creator/logging.rb +7 -0
- data/lib/audio_book_creator/page_db.rb +42 -0
- data/lib/audio_book_creator/page_def.rb +31 -0
- data/lib/audio_book_creator/runner.rb +22 -0
- data/lib/audio_book_creator/speaker.rb +54 -0
- data/lib/audio_book_creator/speaker_def.rb +39 -0
- data/lib/audio_book_creator/spider.rb +60 -0
- data/lib/audio_book_creator/spoken_chapter.rb +16 -0
- data/lib/audio_book_creator/surfer_def.rb +15 -0
- data/lib/audio_book_creator/url_filter.rb +33 -0
- data/lib/audio_book_creator/version.rb +3 -0
- data/lib/audio_book_creator/web.rb +44 -0
- data/spec/audio_book_creator/binder_spec.rb +103 -0
- data/spec/audio_book_creator/book_creator_spec.rb +63 -0
- data/spec/audio_book_creator/book_def_spec.rb +61 -0
- data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
- data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
- data/spec/audio_book_creator/chapter_spec.rb +80 -0
- data/spec/audio_book_creator/cli_spec.rb +274 -0
- data/spec/audio_book_creator/conductor_spec.rb +102 -0
- data/spec/audio_book_creator/editor_spec.rb +39 -0
- data/spec/audio_book_creator/logging_spec.rb +21 -0
- data/spec/audio_book_creator/page_db_spec.rb +74 -0
- data/spec/audio_book_creator/page_def_spec.rb +79 -0
- data/spec/audio_book_creator/runner_spec.rb +65 -0
- data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
- data/spec/audio_book_creator/speaker_spec.rb +105 -0
- data/spec/audio_book_creator/spider_spec.rb +172 -0
- data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
- data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
- data/spec/audio_book_creator/url_filter_spec.rb +52 -0
- data/spec/audio_book_creator/version_spec.rb +5 -0
- data/spec/audio_book_creator/web_spec.rb +66 -0
- data/spec/audio_book_creator_spec.rb +25 -0
- data/spec/spec_helper.rb +106 -0
- data/spec/support/test_logger.rb +21 -0
- metadata +238 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class Chapter
|
3
|
+
attr_accessor :number, :title, :body
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
options.each { |n, v| public_send("#{n}=", v) }
|
7
|
+
@body = Array(@body).compact.join("\n\n")
|
8
|
+
end
|
9
|
+
|
10
|
+
def filename
|
11
|
+
format("chapter%02d", number)
|
12
|
+
end
|
13
|
+
|
14
|
+
def empty?
|
15
|
+
body.empty?
|
16
|
+
end
|
17
|
+
|
18
|
+
def present?
|
19
|
+
!empty?
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
"#{title}\n\n#{body}\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def ==(other)
|
27
|
+
other.kind_of?(Chapter) &&
|
28
|
+
other.number == number &&
|
29
|
+
other.title.eql?(title) && other.body.eql?(body)
|
30
|
+
end
|
31
|
+
alias :eql? :==
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'logger'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module AudioBookCreator
|
7
|
+
class Cli
|
8
|
+
include Logging
|
9
|
+
def initialize
|
10
|
+
self.verbose = false
|
11
|
+
page_def.title_path = "h1"
|
12
|
+
page_def.body_path = "p"
|
13
|
+
page_def.link_path = "a"
|
14
|
+
end
|
15
|
+
|
16
|
+
# stub for testing
|
17
|
+
attr_writer :web
|
18
|
+
|
19
|
+
def set_args(argv, usage)
|
20
|
+
if argv.empty?
|
21
|
+
puts "please url", usage
|
22
|
+
exit 2
|
23
|
+
elsif argv.first.include?("://")
|
24
|
+
book_def.title = argv.first.split("/").last
|
25
|
+
book_def.urls = argv
|
26
|
+
else
|
27
|
+
book_def.title = argv.shift
|
28
|
+
book_def.urls = argv
|
29
|
+
end
|
30
|
+
surfer_def.cache_filename = database
|
31
|
+
surfer_def.host = book_def.urls.first
|
32
|
+
end
|
33
|
+
|
34
|
+
def database
|
35
|
+
"pages.db"
|
36
|
+
end
|
37
|
+
|
38
|
+
def verbose=(val)
|
39
|
+
logger.level = val ? Logger::INFO : Logger::WARN
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse(argv)
|
43
|
+
options = OptionParser.new do |opts|
|
44
|
+
opts.program_name = "audio_book_creator"
|
45
|
+
opts.version = VERSION
|
46
|
+
opts.banner = "Usage: audio_book_creator [options] title url [url] [...]"
|
47
|
+
opt(opts, self) do |o|
|
48
|
+
o.opt(:verbose, "-v", "--verbose", "Run verbosely")
|
49
|
+
end
|
50
|
+
opt(opts, page_def) do |o|
|
51
|
+
o.opt(:title_path, "--title STRING", "Title css (e.g.: h1)")
|
52
|
+
o.opt(:body_path, "--body STRING", "Content css (e.g.: p)")
|
53
|
+
o.opt(:link_path, "--link STRING", "Next Page css (e.g.: a.Next)")
|
54
|
+
o.opt(:chapter_path, "--chapter STRING", "Next Chapter css")
|
55
|
+
end
|
56
|
+
opt(opts, surfer_def) do |o|
|
57
|
+
o.opt(:max, "--no-max", "Don't limit the number of pages to visit")
|
58
|
+
o.opt(:max, "--max NUMBER", Integer, "Maximum number of pages to visit (default: 10)")
|
59
|
+
o.opt(:regen_html, "--force-html", "Regerate the audio")
|
60
|
+
end
|
61
|
+
opt(opts, speaker_def) do |o|
|
62
|
+
o.opt(:regen_audio, "--force-audio", "Regerate the audio")
|
63
|
+
o.opt(:rate, "--rate NUMBER", Integer, "Set words per minute")
|
64
|
+
o.opt(:voice, "--voice STRING", "Set speaker voice")
|
65
|
+
end
|
66
|
+
opt(opts, book_def) do |o|
|
67
|
+
o.opt(:base_dir, "--base-dir STRING", "Directory to hold files")
|
68
|
+
o.opt(:itunes, "--itunes", "-A", "Load book into itunes")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
options.parse!(argv)
|
72
|
+
set_args(argv, options.to_s)
|
73
|
+
self
|
74
|
+
end
|
75
|
+
|
76
|
+
# parameter objects
|
77
|
+
|
78
|
+
def page_def
|
79
|
+
@page_def ||= PageDef.new
|
80
|
+
end
|
81
|
+
|
82
|
+
def book_def
|
83
|
+
@book_def ||= BookDef.new
|
84
|
+
end
|
85
|
+
|
86
|
+
def speaker_def
|
87
|
+
@speaker_def ||= SpeakerDef.new
|
88
|
+
end
|
89
|
+
|
90
|
+
def surfer_def
|
91
|
+
@surfer_def ||= SurferDef.new
|
92
|
+
end
|
93
|
+
|
94
|
+
def conductor
|
95
|
+
@conductor ||= Conductor.new(page_def, book_def, speaker_def, surfer_def)
|
96
|
+
end
|
97
|
+
|
98
|
+
def run
|
99
|
+
conductor.run
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def opt(opts, model)
|
105
|
+
yield OptSetter.new(opts, model)
|
106
|
+
end
|
107
|
+
|
108
|
+
class OptSetter
|
109
|
+
def initialize(opts, model)
|
110
|
+
@opts = opts
|
111
|
+
@model = model
|
112
|
+
end
|
113
|
+
|
114
|
+
def opt(value, *args)
|
115
|
+
@opts.on(*args) { |v| @model.send("#{value}=", v) }
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class Conductor
|
3
|
+
attr_accessor :page_def
|
4
|
+
attr_accessor :book_def
|
5
|
+
attr_accessor :speaker_def
|
6
|
+
attr_accessor :surfer_def
|
7
|
+
|
8
|
+
def initialize(page_def, book_def, speaker_def, surfer_def)
|
9
|
+
@page_def = page_def
|
10
|
+
@book_def = book_def
|
11
|
+
@speaker_def = speaker_def
|
12
|
+
@surfer_def = surfer_def
|
13
|
+
end
|
14
|
+
|
15
|
+
# components
|
16
|
+
|
17
|
+
## spider
|
18
|
+
|
19
|
+
def page_cache
|
20
|
+
@page_cache ||= PageDb.new(surfer_def.cache_filename)
|
21
|
+
end
|
22
|
+
|
23
|
+
def web
|
24
|
+
@web ||= Web.new(surfer_def.max)
|
25
|
+
end
|
26
|
+
|
27
|
+
def cached_web
|
28
|
+
@cached_hash ||= CachedHash.new(page_cache, web)
|
29
|
+
end
|
30
|
+
|
31
|
+
def invalid_urls
|
32
|
+
@invalid_urls ||= UrlFilter.new(surfer_def.host)
|
33
|
+
end
|
34
|
+
|
35
|
+
def spider
|
36
|
+
@spider ||= Spider.new(page_def, cached_web, invalid_urls)
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
|
41
|
+
def editor
|
42
|
+
@editor ||= Editor.new(page_def)
|
43
|
+
end
|
44
|
+
|
45
|
+
def speaker
|
46
|
+
@speaker ||= Speaker.new(speaker_def, book_def)
|
47
|
+
end
|
48
|
+
|
49
|
+
def binder
|
50
|
+
@binder ||= Binder.new(book_def, speaker_def)
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
|
55
|
+
def creator
|
56
|
+
@creator ||= BookCreator.new(spider, editor, speaker, binder)
|
57
|
+
end
|
58
|
+
|
59
|
+
def outstanding
|
60
|
+
@outstanding ||= book_def.unique_urls
|
61
|
+
end
|
62
|
+
|
63
|
+
def run
|
64
|
+
creator.create(outstanding)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
module AudioBookCreator
|
3
|
+
class Editor
|
4
|
+
attr_accessor :page_def
|
5
|
+
|
6
|
+
def initialize(page_def)
|
7
|
+
@page_def = page_def
|
8
|
+
end
|
9
|
+
|
10
|
+
# convert page[] -> chapter[]
|
11
|
+
def parse(pages)
|
12
|
+
pages.each_with_index.map do |page, i|
|
13
|
+
dom = Nokogiri::HTML(page)
|
14
|
+
title = page_def.title(dom) || "Chapter #{i + 1}"
|
15
|
+
body = page_def.body(dom)
|
16
|
+
Chapter.new(number: (i + 1), title: title, body: body)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "sqlite3"
|
2
|
+
|
3
|
+
module AudioBookCreator
|
4
|
+
class PageDb
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# this is for tests - get out of here
|
8
|
+
attr_accessor :filename
|
9
|
+
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
end
|
13
|
+
|
14
|
+
def []=(key, value)
|
15
|
+
db.execute "insert into pages (name, contents) values ( ?, ?)", [key, value]
|
16
|
+
end
|
17
|
+
|
18
|
+
def [](key)
|
19
|
+
db.execute("select contents from pages where name = ?", key).map { |row| row.first }.first
|
20
|
+
end
|
21
|
+
|
22
|
+
def include?(key)
|
23
|
+
!!self[key]
|
24
|
+
end
|
25
|
+
|
26
|
+
def each(&block)
|
27
|
+
db.execute "select name, contents from pages order by rowid", &block
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def db
|
33
|
+
@db ||= create
|
34
|
+
end
|
35
|
+
|
36
|
+
def create
|
37
|
+
SQLite3::Database.new(filename).tap do |db|
|
38
|
+
db.execute("create table if not exists pages (name text, contents blob)")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
# information on the format of the html page that is read
|
3
|
+
class PageDef
|
4
|
+
attr_accessor :title_path, :body_path, :link_path, :chapter_path
|
5
|
+
|
6
|
+
def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
|
7
|
+
@title_path = title_path
|
8
|
+
@body_path = body_path
|
9
|
+
@link_path = link_path
|
10
|
+
@chapter_path = chapter_path
|
11
|
+
end
|
12
|
+
|
13
|
+
def title(dom)
|
14
|
+
title = dom.css(title_path).first
|
15
|
+
title.text if title
|
16
|
+
end
|
17
|
+
|
18
|
+
def body(dom)
|
19
|
+
dom.css(body_path)
|
20
|
+
# feels like I need .map { |n| n.text }
|
21
|
+
end
|
22
|
+
|
23
|
+
def page_links(dom, &block)
|
24
|
+
dom.css(link_path).map(&block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def chapter_links(dom, &block)
|
28
|
+
dom.css(chapter_path).map(&block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# migrate to awesome spawn
|
2
|
+
module AudioBookCreator
|
3
|
+
class Runner
|
4
|
+
include Logging
|
5
|
+
|
6
|
+
def run(cmd, options)
|
7
|
+
params = options[:params].flatten.flatten.compact
|
8
|
+
|
9
|
+
logger.info { "run: #{cmd} #{params.join(" ")}" }
|
10
|
+
logger.info ""
|
11
|
+
status = system(cmd, *params.map { |x| x.to_s })
|
12
|
+
logger.info ""
|
13
|
+
logger.info { status ? "success" : "issue" }
|
14
|
+
|
15
|
+
status
|
16
|
+
end
|
17
|
+
|
18
|
+
def run!(cmd, options)
|
19
|
+
run(cmd, options) || raise("trouble running command")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class Speaker
|
3
|
+
attr_accessor :speaker_def
|
4
|
+
attr_accessor :book_def
|
5
|
+
|
6
|
+
def initialize(speaker_def, book_def)
|
7
|
+
@speaker_def = speaker_def
|
8
|
+
@book_def = book_def
|
9
|
+
end
|
10
|
+
|
11
|
+
def make_directory_structure
|
12
|
+
FileUtils.mkdir(base_dir) unless File.exist?(base_dir)
|
13
|
+
end
|
14
|
+
|
15
|
+
def say(chapter)
|
16
|
+
raise "Empty chapter" if chapter.empty?
|
17
|
+
text_filename = chapter_text_filename(chapter)
|
18
|
+
sound_filename = chapter_sound_filename(chapter)
|
19
|
+
|
20
|
+
AudioBookCreator.optionally_write(text_filename, force) { chapter.to_s }
|
21
|
+
AudioBookCreator.optionally_run(sound_filename, force) do
|
22
|
+
["say", params: params(text_filename, sound_filename)]
|
23
|
+
end
|
24
|
+
SpokenChapter.new(chapter.title, sound_filename)
|
25
|
+
end
|
26
|
+
|
27
|
+
def chapter_text_filename(chapter)
|
28
|
+
"#{base_dir}/#{chapter.filename}.txt"
|
29
|
+
end
|
30
|
+
|
31
|
+
def chapter_sound_filename(chapter)
|
32
|
+
"#{base_dir}/#{chapter.filename}.m4a"
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def base_dir
|
38
|
+
book_def.base_dir
|
39
|
+
end
|
40
|
+
|
41
|
+
def force
|
42
|
+
speaker_def.regen_audio
|
43
|
+
end
|
44
|
+
|
45
|
+
def params(text_filename, sound_filename)
|
46
|
+
{
|
47
|
+
"-v" => speaker_def.voice,
|
48
|
+
"-r" => speaker_def.rate,
|
49
|
+
"-f" => text_filename,
|
50
|
+
"-o" => sound_filename,
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class SpeakerDef
|
3
|
+
|
4
|
+
# currently like the following voices:
|
5
|
+
# Vicki # 10
|
6
|
+
# Serena # 8 UK
|
7
|
+
# Allison # ? (ok)
|
8
|
+
# Moira # 7 Irish
|
9
|
+
# Fiona # 5 Scottish
|
10
|
+
# Kate # 4 UK
|
11
|
+
# Susan # 2
|
12
|
+
# Zosia # 0 Poland
|
13
|
+
# Angelica # 0 Mexican?
|
14
|
+
# Paulina # 0 Mexican
|
15
|
+
attr_accessor :voice
|
16
|
+
attr_accessor :rate
|
17
|
+
|
18
|
+
attr_accessor :channels
|
19
|
+
# split on this hour mark
|
20
|
+
attr_accessor :max_hours
|
21
|
+
attr_accessor :bit_rate
|
22
|
+
attr_accessor :sample_rate
|
23
|
+
attr_accessor :regen_audio
|
24
|
+
|
25
|
+
def initialize(options = {})
|
26
|
+
options.each { |n, v| public_send("#{n}=", v) }
|
27
|
+
|
28
|
+
# for speaking the chapter
|
29
|
+
@voice ||= "Vicki"
|
30
|
+
@rate ||= 280
|
31
|
+
|
32
|
+
# for binding the book
|
33
|
+
@channels ||= 1
|
34
|
+
@bit_rate ||= 32
|
35
|
+
@max_hours ||= 7
|
36
|
+
@sample_rate ||= 22_050
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|