audio_book_creator 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.rspec +4 -0
- data/.travis.yml +8 -0
- data/Gemfile +8 -0
- data/README.md +60 -0
- data/Rakefile +8 -0
- data/audio_book_creator.gemspec +31 -0
- data/bin/audio_book_creator +6 -0
- data/lib/audio_book_creator.rb +59 -0
- data/lib/audio_book_creator/binder.rb +61 -0
- data/lib/audio_book_creator/book_creator.rb +31 -0
- data/lib/audio_book_creator/book_def.rb +36 -0
- data/lib/audio_book_creator/cached_hash.rb +20 -0
- data/lib/audio_book_creator/cascading_array.rb +57 -0
- data/lib/audio_book_creator/chapter.rb +33 -0
- data/lib/audio_book_creator/cli.rb +119 -0
- data/lib/audio_book_creator/conductor.rb +67 -0
- data/lib/audio_book_creator/editor.rb +20 -0
- data/lib/audio_book_creator/logging.rb +7 -0
- data/lib/audio_book_creator/page_db.rb +42 -0
- data/lib/audio_book_creator/page_def.rb +31 -0
- data/lib/audio_book_creator/runner.rb +22 -0
- data/lib/audio_book_creator/speaker.rb +54 -0
- data/lib/audio_book_creator/speaker_def.rb +39 -0
- data/lib/audio_book_creator/spider.rb +60 -0
- data/lib/audio_book_creator/spoken_chapter.rb +16 -0
- data/lib/audio_book_creator/surfer_def.rb +15 -0
- data/lib/audio_book_creator/url_filter.rb +33 -0
- data/lib/audio_book_creator/version.rb +3 -0
- data/lib/audio_book_creator/web.rb +44 -0
- data/spec/audio_book_creator/binder_spec.rb +103 -0
- data/spec/audio_book_creator/book_creator_spec.rb +63 -0
- data/spec/audio_book_creator/book_def_spec.rb +61 -0
- data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
- data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
- data/spec/audio_book_creator/chapter_spec.rb +80 -0
- data/spec/audio_book_creator/cli_spec.rb +274 -0
- data/spec/audio_book_creator/conductor_spec.rb +102 -0
- data/spec/audio_book_creator/editor_spec.rb +39 -0
- data/spec/audio_book_creator/logging_spec.rb +21 -0
- data/spec/audio_book_creator/page_db_spec.rb +74 -0
- data/spec/audio_book_creator/page_def_spec.rb +79 -0
- data/spec/audio_book_creator/runner_spec.rb +65 -0
- data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
- data/spec/audio_book_creator/speaker_spec.rb +105 -0
- data/spec/audio_book_creator/spider_spec.rb +172 -0
- data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
- data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
- data/spec/audio_book_creator/url_filter_spec.rb +52 -0
- data/spec/audio_book_creator/version_spec.rb +5 -0
- data/spec/audio_book_creator/web_spec.rb +66 -0
- data/spec/audio_book_creator_spec.rb +25 -0
- data/spec/spec_helper.rb +106 -0
- data/spec/support/test_logger.rb +21 -0
- metadata +238 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class Chapter
|
3
|
+
attr_accessor :number, :title, :body
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
options.each { |n, v| public_send("#{n}=", v) }
|
7
|
+
@body = Array(@body).compact.join("\n\n")
|
8
|
+
end
|
9
|
+
|
10
|
+
def filename
|
11
|
+
format("chapter%02d", number)
|
12
|
+
end
|
13
|
+
|
14
|
+
def empty?
|
15
|
+
body.empty?
|
16
|
+
end
|
17
|
+
|
18
|
+
def present?
|
19
|
+
!empty?
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
"#{title}\n\n#{body}\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def ==(other)
|
27
|
+
other.kind_of?(Chapter) &&
|
28
|
+
other.number == number &&
|
29
|
+
other.title.eql?(title) && other.body.eql?(body)
|
30
|
+
end
|
31
|
+
alias :eql? :==
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'logger'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module AudioBookCreator
|
7
|
+
class Cli
|
8
|
+
include Logging
|
9
|
+
def initialize
|
10
|
+
self.verbose = false
|
11
|
+
page_def.title_path = "h1"
|
12
|
+
page_def.body_path = "p"
|
13
|
+
page_def.link_path = "a"
|
14
|
+
end
|
15
|
+
|
16
|
+
# stub for testing
|
17
|
+
attr_writer :web
|
18
|
+
|
19
|
+
def set_args(argv, usage)
|
20
|
+
if argv.empty?
|
21
|
+
puts "please url", usage
|
22
|
+
exit 2
|
23
|
+
elsif argv.first.include?("://")
|
24
|
+
book_def.title = argv.first.split("/").last
|
25
|
+
book_def.urls = argv
|
26
|
+
else
|
27
|
+
book_def.title = argv.shift
|
28
|
+
book_def.urls = argv
|
29
|
+
end
|
30
|
+
surfer_def.cache_filename = database
|
31
|
+
surfer_def.host = book_def.urls.first
|
32
|
+
end
|
33
|
+
|
34
|
+
def database
|
35
|
+
"pages.db"
|
36
|
+
end
|
37
|
+
|
38
|
+
def verbose=(val)
|
39
|
+
logger.level = val ? Logger::INFO : Logger::WARN
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse(argv)
|
43
|
+
options = OptionParser.new do |opts|
|
44
|
+
opts.program_name = "audio_book_creator"
|
45
|
+
opts.version = VERSION
|
46
|
+
opts.banner = "Usage: audio_book_creator [options] title url [url] [...]"
|
47
|
+
opt(opts, self) do |o|
|
48
|
+
o.opt(:verbose, "-v", "--verbose", "Run verbosely")
|
49
|
+
end
|
50
|
+
opt(opts, page_def) do |o|
|
51
|
+
o.opt(:title_path, "--title STRING", "Title css (e.g.: h1)")
|
52
|
+
o.opt(:body_path, "--body STRING", "Content css (e.g.: p)")
|
53
|
+
o.opt(:link_path, "--link STRING", "Next Page css (e.g.: a.Next)")
|
54
|
+
o.opt(:chapter_path, "--chapter STRING", "Next Chapter css")
|
55
|
+
end
|
56
|
+
opt(opts, surfer_def) do |o|
|
57
|
+
o.opt(:max, "--no-max", "Don't limit the number of pages to visit")
|
58
|
+
o.opt(:max, "--max NUMBER", Integer, "Maximum number of pages to visit (default: 10)")
|
59
|
+
o.opt(:regen_html, "--force-html", "Regerate the audio")
|
60
|
+
end
|
61
|
+
opt(opts, speaker_def) do |o|
|
62
|
+
o.opt(:regen_audio, "--force-audio", "Regerate the audio")
|
63
|
+
o.opt(:rate, "--rate NUMBER", Integer, "Set words per minute")
|
64
|
+
o.opt(:voice, "--voice STRING", "Set speaker voice")
|
65
|
+
end
|
66
|
+
opt(opts, book_def) do |o|
|
67
|
+
o.opt(:base_dir, "--base-dir STRING", "Directory to hold files")
|
68
|
+
o.opt(:itunes, "--itunes", "-A", "Load book into itunes")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
options.parse!(argv)
|
72
|
+
set_args(argv, options.to_s)
|
73
|
+
self
|
74
|
+
end
|
75
|
+
|
76
|
+
# parameter objects
|
77
|
+
|
78
|
+
def page_def
|
79
|
+
@page_def ||= PageDef.new
|
80
|
+
end
|
81
|
+
|
82
|
+
def book_def
|
83
|
+
@book_def ||= BookDef.new
|
84
|
+
end
|
85
|
+
|
86
|
+
def speaker_def
|
87
|
+
@speaker_def ||= SpeakerDef.new
|
88
|
+
end
|
89
|
+
|
90
|
+
def surfer_def
|
91
|
+
@surfer_def ||= SurferDef.new
|
92
|
+
end
|
93
|
+
|
94
|
+
def conductor
|
95
|
+
@conductor ||= Conductor.new(page_def, book_def, speaker_def, surfer_def)
|
96
|
+
end
|
97
|
+
|
98
|
+
def run
|
99
|
+
conductor.run
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def opt(opts, model)
|
105
|
+
yield OptSetter.new(opts, model)
|
106
|
+
end
|
107
|
+
|
108
|
+
class OptSetter
|
109
|
+
def initialize(opts, model)
|
110
|
+
@opts = opts
|
111
|
+
@model = model
|
112
|
+
end
|
113
|
+
|
114
|
+
def opt(value, *args)
|
115
|
+
@opts.on(*args) { |v| @model.send("#{value}=", v) }
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class Conductor
|
3
|
+
attr_accessor :page_def
|
4
|
+
attr_accessor :book_def
|
5
|
+
attr_accessor :speaker_def
|
6
|
+
attr_accessor :surfer_def
|
7
|
+
|
8
|
+
def initialize(page_def, book_def, speaker_def, surfer_def)
|
9
|
+
@page_def = page_def
|
10
|
+
@book_def = book_def
|
11
|
+
@speaker_def = speaker_def
|
12
|
+
@surfer_def = surfer_def
|
13
|
+
end
|
14
|
+
|
15
|
+
# components
|
16
|
+
|
17
|
+
## spider
|
18
|
+
|
19
|
+
def page_cache
|
20
|
+
@page_cache ||= PageDb.new(surfer_def.cache_filename)
|
21
|
+
end
|
22
|
+
|
23
|
+
def web
|
24
|
+
@web ||= Web.new(surfer_def.max)
|
25
|
+
end
|
26
|
+
|
27
|
+
def cached_web
|
28
|
+
@cached_hash ||= CachedHash.new(page_cache, web)
|
29
|
+
end
|
30
|
+
|
31
|
+
def invalid_urls
|
32
|
+
@invalid_urls ||= UrlFilter.new(surfer_def.host)
|
33
|
+
end
|
34
|
+
|
35
|
+
def spider
|
36
|
+
@spider ||= Spider.new(page_def, cached_web, invalid_urls)
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
|
41
|
+
def editor
|
42
|
+
@editor ||= Editor.new(page_def)
|
43
|
+
end
|
44
|
+
|
45
|
+
def speaker
|
46
|
+
@speaker ||= Speaker.new(speaker_def, book_def)
|
47
|
+
end
|
48
|
+
|
49
|
+
def binder
|
50
|
+
@binder ||= Binder.new(book_def, speaker_def)
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
|
55
|
+
def creator
|
56
|
+
@creator ||= BookCreator.new(spider, editor, speaker, binder)
|
57
|
+
end
|
58
|
+
|
59
|
+
def outstanding
|
60
|
+
@outstanding ||= book_def.unique_urls
|
61
|
+
end
|
62
|
+
|
63
|
+
def run
|
64
|
+
creator.create(outstanding)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
module AudioBookCreator
|
3
|
+
class Editor
|
4
|
+
attr_accessor :page_def
|
5
|
+
|
6
|
+
def initialize(page_def)
|
7
|
+
@page_def = page_def
|
8
|
+
end
|
9
|
+
|
10
|
+
# convert page[] -> chapter[]
|
11
|
+
def parse(pages)
|
12
|
+
pages.each_with_index.map do |page, i|
|
13
|
+
dom = Nokogiri::HTML(page)
|
14
|
+
title = page_def.title(dom) || "Chapter #{i + 1}"
|
15
|
+
body = page_def.body(dom)
|
16
|
+
Chapter.new(number: (i + 1), title: title, body: body)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "sqlite3"
|
2
|
+
|
3
|
+
module AudioBookCreator
|
4
|
+
class PageDb
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# this is for tests - get out of here
|
8
|
+
attr_accessor :filename
|
9
|
+
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
end
|
13
|
+
|
14
|
+
def []=(key, value)
|
15
|
+
db.execute "insert into pages (name, contents) values ( ?, ?)", [key, value]
|
16
|
+
end
|
17
|
+
|
18
|
+
def [](key)
|
19
|
+
db.execute("select contents from pages where name = ?", key).map { |row| row.first }.first
|
20
|
+
end
|
21
|
+
|
22
|
+
def include?(key)
|
23
|
+
!!self[key]
|
24
|
+
end
|
25
|
+
|
26
|
+
def each(&block)
|
27
|
+
db.execute "select name, contents from pages order by rowid", &block
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def db
|
33
|
+
@db ||= create
|
34
|
+
end
|
35
|
+
|
36
|
+
def create
|
37
|
+
SQLite3::Database.new(filename).tap do |db|
|
38
|
+
db.execute("create table if not exists pages (name text, contents blob)")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
# information on the format of the html page that is read
|
3
|
+
class PageDef
|
4
|
+
attr_accessor :title_path, :body_path, :link_path, :chapter_path
|
5
|
+
|
6
|
+
def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
|
7
|
+
@title_path = title_path
|
8
|
+
@body_path = body_path
|
9
|
+
@link_path = link_path
|
10
|
+
@chapter_path = chapter_path
|
11
|
+
end
|
12
|
+
|
13
|
+
def title(dom)
|
14
|
+
title = dom.css(title_path).first
|
15
|
+
title.text if title
|
16
|
+
end
|
17
|
+
|
18
|
+
def body(dom)
|
19
|
+
dom.css(body_path)
|
20
|
+
# feels like I need .map { |n| n.text }
|
21
|
+
end
|
22
|
+
|
23
|
+
def page_links(dom, &block)
|
24
|
+
dom.css(link_path).map(&block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def chapter_links(dom, &block)
|
28
|
+
dom.css(chapter_path).map(&block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# migrate to awesome spawn
|
2
|
+
module AudioBookCreator
|
3
|
+
class Runner
|
4
|
+
include Logging
|
5
|
+
|
6
|
+
def run(cmd, options)
|
7
|
+
params = options[:params].flatten.flatten.compact
|
8
|
+
|
9
|
+
logger.info { "run: #{cmd} #{params.join(" ")}" }
|
10
|
+
logger.info ""
|
11
|
+
status = system(cmd, *params.map { |x| x.to_s })
|
12
|
+
logger.info ""
|
13
|
+
logger.info { status ? "success" : "issue" }
|
14
|
+
|
15
|
+
status
|
16
|
+
end
|
17
|
+
|
18
|
+
def run!(cmd, options)
|
19
|
+
run(cmd, options) || raise("trouble running command")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class Speaker
|
3
|
+
attr_accessor :speaker_def
|
4
|
+
attr_accessor :book_def
|
5
|
+
|
6
|
+
def initialize(speaker_def, book_def)
|
7
|
+
@speaker_def = speaker_def
|
8
|
+
@book_def = book_def
|
9
|
+
end
|
10
|
+
|
11
|
+
def make_directory_structure
|
12
|
+
FileUtils.mkdir(base_dir) unless File.exist?(base_dir)
|
13
|
+
end
|
14
|
+
|
15
|
+
def say(chapter)
|
16
|
+
raise "Empty chapter" if chapter.empty?
|
17
|
+
text_filename = chapter_text_filename(chapter)
|
18
|
+
sound_filename = chapter_sound_filename(chapter)
|
19
|
+
|
20
|
+
AudioBookCreator.optionally_write(text_filename, force) { chapter.to_s }
|
21
|
+
AudioBookCreator.optionally_run(sound_filename, force) do
|
22
|
+
["say", params: params(text_filename, sound_filename)]
|
23
|
+
end
|
24
|
+
SpokenChapter.new(chapter.title, sound_filename)
|
25
|
+
end
|
26
|
+
|
27
|
+
def chapter_text_filename(chapter)
|
28
|
+
"#{base_dir}/#{chapter.filename}.txt"
|
29
|
+
end
|
30
|
+
|
31
|
+
def chapter_sound_filename(chapter)
|
32
|
+
"#{base_dir}/#{chapter.filename}.m4a"
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def base_dir
|
38
|
+
book_def.base_dir
|
39
|
+
end
|
40
|
+
|
41
|
+
def force
|
42
|
+
speaker_def.regen_audio
|
43
|
+
end
|
44
|
+
|
45
|
+
def params(text_filename, sound_filename)
|
46
|
+
{
|
47
|
+
"-v" => speaker_def.voice,
|
48
|
+
"-r" => speaker_def.rate,
|
49
|
+
"-f" => text_filename,
|
50
|
+
"-o" => sound_filename,
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module AudioBookCreator
|
2
|
+
class SpeakerDef
|
3
|
+
|
4
|
+
# currently like the following voices:
|
5
|
+
# Vicki # 10
|
6
|
+
# Serena # 8 UK
|
7
|
+
# Allison # ? (ok)
|
8
|
+
# Moira # 7 Irish
|
9
|
+
# Fiona # 5 Scottish
|
10
|
+
# Kate # 4 UK
|
11
|
+
# Susan # 2
|
12
|
+
# Zosia # 0 Poland
|
13
|
+
# Angelica # 0 Mexican?
|
14
|
+
# Paulina # 0 Mexican
|
15
|
+
attr_accessor :voice
|
16
|
+
attr_accessor :rate
|
17
|
+
|
18
|
+
attr_accessor :channels
|
19
|
+
# split on this hour mark
|
20
|
+
attr_accessor :max_hours
|
21
|
+
attr_accessor :bit_rate
|
22
|
+
attr_accessor :sample_rate
|
23
|
+
attr_accessor :regen_audio
|
24
|
+
|
25
|
+
def initialize(options = {})
|
26
|
+
options.each { |n, v| public_send("#{n}=", v) }
|
27
|
+
|
28
|
+
# for speaking the chapter
|
29
|
+
@voice ||= "Vicki"
|
30
|
+
@rate ||= 280
|
31
|
+
|
32
|
+
# for binding the book
|
33
|
+
@channels ||= 1
|
34
|
+
@bit_rate ||= 32
|
35
|
+
@max_hours ||= 7
|
36
|
+
@sample_rate ||= 22_050
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|