audio_book_creator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +4 -0
  4. data/.travis.yml +8 -0
  5. data/Gemfile +8 -0
  6. data/README.md +60 -0
  7. data/Rakefile +8 -0
  8. data/audio_book_creator.gemspec +31 -0
  9. data/bin/audio_book_creator +6 -0
  10. data/lib/audio_book_creator.rb +59 -0
  11. data/lib/audio_book_creator/binder.rb +61 -0
  12. data/lib/audio_book_creator/book_creator.rb +31 -0
  13. data/lib/audio_book_creator/book_def.rb +36 -0
  14. data/lib/audio_book_creator/cached_hash.rb +20 -0
  15. data/lib/audio_book_creator/cascading_array.rb +57 -0
  16. data/lib/audio_book_creator/chapter.rb +33 -0
  17. data/lib/audio_book_creator/cli.rb +119 -0
  18. data/lib/audio_book_creator/conductor.rb +67 -0
  19. data/lib/audio_book_creator/editor.rb +20 -0
  20. data/lib/audio_book_creator/logging.rb +7 -0
  21. data/lib/audio_book_creator/page_db.rb +42 -0
  22. data/lib/audio_book_creator/page_def.rb +31 -0
  23. data/lib/audio_book_creator/runner.rb +22 -0
  24. data/lib/audio_book_creator/speaker.rb +54 -0
  25. data/lib/audio_book_creator/speaker_def.rb +39 -0
  26. data/lib/audio_book_creator/spider.rb +60 -0
  27. data/lib/audio_book_creator/spoken_chapter.rb +16 -0
  28. data/lib/audio_book_creator/surfer_def.rb +15 -0
  29. data/lib/audio_book_creator/url_filter.rb +33 -0
  30. data/lib/audio_book_creator/version.rb +3 -0
  31. data/lib/audio_book_creator/web.rb +44 -0
  32. data/spec/audio_book_creator/binder_spec.rb +103 -0
  33. data/spec/audio_book_creator/book_creator_spec.rb +63 -0
  34. data/spec/audio_book_creator/book_def_spec.rb +61 -0
  35. data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
  36. data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
  37. data/spec/audio_book_creator/chapter_spec.rb +80 -0
  38. data/spec/audio_book_creator/cli_spec.rb +274 -0
  39. data/spec/audio_book_creator/conductor_spec.rb +102 -0
  40. data/spec/audio_book_creator/editor_spec.rb +39 -0
  41. data/spec/audio_book_creator/logging_spec.rb +21 -0
  42. data/spec/audio_book_creator/page_db_spec.rb +74 -0
  43. data/spec/audio_book_creator/page_def_spec.rb +79 -0
  44. data/spec/audio_book_creator/runner_spec.rb +65 -0
  45. data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
  46. data/spec/audio_book_creator/speaker_spec.rb +105 -0
  47. data/spec/audio_book_creator/spider_spec.rb +172 -0
  48. data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
  49. data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
  50. data/spec/audio_book_creator/url_filter_spec.rb +52 -0
  51. data/spec/audio_book_creator/version_spec.rb +5 -0
  52. data/spec/audio_book_creator/web_spec.rb +66 -0
  53. data/spec/audio_book_creator_spec.rb +25 -0
  54. data/spec/spec_helper.rb +106 -0
  55. data/spec/support/test_logger.rb +21 -0
  56. metadata +238 -0
@@ -0,0 +1,33 @@
1
+ module AudioBookCreator
2
+ class Chapter
3
+ attr_accessor :number, :title, :body
4
+
5
+ def initialize(options = {})
6
+ options.each { |n, v| public_send("#{n}=", v) }
7
+ @body = Array(@body).compact.join("\n\n")
8
+ end
9
+
10
+ def filename
11
+ format("chapter%02d", number)
12
+ end
13
+
14
+ def empty?
15
+ body.empty?
16
+ end
17
+
18
+ def present?
19
+ !empty?
20
+ end
21
+
22
+ def to_s
23
+ "#{title}\n\n#{body}\n"
24
+ end
25
+
26
+ def ==(other)
27
+ other.kind_of?(Chapter) &&
28
+ other.number == number &&
29
+ other.title.eql?(title) && other.body.eql?(body)
30
+ end
31
+ alias :eql? :==
32
+ end
33
+ end
@@ -0,0 +1,119 @@
1
+ require 'optparse'
2
+ require 'fileutils'
3
+ require 'logger'
4
+ require 'uri'
5
+
6
+ module AudioBookCreator
7
+ class Cli
8
+ include Logging
9
+ def initialize
10
+ self.verbose = false
11
+ page_def.title_path = "h1"
12
+ page_def.body_path = "p"
13
+ page_def.link_path = "a"
14
+ end
15
+
16
+ # stub for testing
17
+ attr_writer :web
18
+
19
+ def set_args(argv, usage)
20
+ if argv.empty?
21
+ puts "please url", usage
22
+ exit 2
23
+ elsif argv.first.include?("://")
24
+ book_def.title = argv.first.split("/").last
25
+ book_def.urls = argv
26
+ else
27
+ book_def.title = argv.shift
28
+ book_def.urls = argv
29
+ end
30
+ surfer_def.cache_filename = database
31
+ surfer_def.host = book_def.urls.first
32
+ end
33
+
34
+ def database
35
+ "pages.db"
36
+ end
37
+
38
+ def verbose=(val)
39
+ logger.level = val ? Logger::INFO : Logger::WARN
40
+ end
41
+
42
+ def parse(argv)
43
+ options = OptionParser.new do |opts|
44
+ opts.program_name = "audio_book_creator"
45
+ opts.version = VERSION
46
+ opts.banner = "Usage: audio_book_creator [options] title url [url] [...]"
47
+ opt(opts, self) do |o|
48
+ o.opt(:verbose, "-v", "--verbose", "Run verbosely")
49
+ end
50
+ opt(opts, page_def) do |o|
51
+ o.opt(:title_path, "--title STRING", "Title css (e.g.: h1)")
52
+ o.opt(:body_path, "--body STRING", "Content css (e.g.: p)")
53
+ o.opt(:link_path, "--link STRING", "Next Page css (e.g.: a.Next)")
54
+ o.opt(:chapter_path, "--chapter STRING", "Next Chapter css")
55
+ end
56
+ opt(opts, surfer_def) do |o|
57
+ o.opt(:max, "--no-max", "Don't limit the number of pages to visit")
58
+ o.opt(:max, "--max NUMBER", Integer, "Maximum number of pages to visit (default: 10)")
59
+ o.opt(:regen_html, "--force-html", "Regerate the audio")
60
+ end
61
+ opt(opts, speaker_def) do |o|
62
+ o.opt(:regen_audio, "--force-audio", "Regerate the audio")
63
+ o.opt(:rate, "--rate NUMBER", Integer, "Set words per minute")
64
+ o.opt(:voice, "--voice STRING", "Set speaker voice")
65
+ end
66
+ opt(opts, book_def) do |o|
67
+ o.opt(:base_dir, "--base-dir STRING", "Directory to hold files")
68
+ o.opt(:itunes, "--itunes", "-A", "Load book into itunes")
69
+ end
70
+ end
71
+ options.parse!(argv)
72
+ set_args(argv, options.to_s)
73
+ self
74
+ end
75
+
76
+ # parameter objects
77
+
78
+ def page_def
79
+ @page_def ||= PageDef.new
80
+ end
81
+
82
+ def book_def
83
+ @book_def ||= BookDef.new
84
+ end
85
+
86
+ def speaker_def
87
+ @speaker_def ||= SpeakerDef.new
88
+ end
89
+
90
+ def surfer_def
91
+ @surfer_def ||= SurferDef.new
92
+ end
93
+
94
+ def conductor
95
+ @conductor ||= Conductor.new(page_def, book_def, speaker_def, surfer_def)
96
+ end
97
+
98
+ def run
99
+ conductor.run
100
+ end
101
+
102
+ private
103
+
104
+ def opt(opts, model)
105
+ yield OptSetter.new(opts, model)
106
+ end
107
+
108
+ class OptSetter
109
+ def initialize(opts, model)
110
+ @opts = opts
111
+ @model = model
112
+ end
113
+
114
+ def opt(value, *args)
115
+ @opts.on(*args) { |v| @model.send("#{value}=", v) }
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,67 @@
1
+ module AudioBookCreator
2
+ class Conductor
3
+ attr_accessor :page_def
4
+ attr_accessor :book_def
5
+ attr_accessor :speaker_def
6
+ attr_accessor :surfer_def
7
+
8
+ def initialize(page_def, book_def, speaker_def, surfer_def)
9
+ @page_def = page_def
10
+ @book_def = book_def
11
+ @speaker_def = speaker_def
12
+ @surfer_def = surfer_def
13
+ end
14
+
15
+ # components
16
+
17
+ ## spider
18
+
19
+ def page_cache
20
+ @page_cache ||= PageDb.new(surfer_def.cache_filename)
21
+ end
22
+
23
+ def web
24
+ @web ||= Web.new(surfer_def.max)
25
+ end
26
+
27
+ def cached_web
28
+ @cached_hash ||= CachedHash.new(page_cache, web)
29
+ end
30
+
31
+ def invalid_urls
32
+ @invalid_urls ||= UrlFilter.new(surfer_def.host)
33
+ end
34
+
35
+ def spider
36
+ @spider ||= Spider.new(page_def, cached_web, invalid_urls)
37
+ end
38
+
39
+ ##
40
+
41
+ def editor
42
+ @editor ||= Editor.new(page_def)
43
+ end
44
+
45
+ def speaker
46
+ @speaker ||= Speaker.new(speaker_def, book_def)
47
+ end
48
+
49
+ def binder
50
+ @binder ||= Binder.new(book_def, speaker_def)
51
+ end
52
+
53
+ ##
54
+
55
+ def creator
56
+ @creator ||= BookCreator.new(spider, editor, speaker, binder)
57
+ end
58
+
59
+ def outstanding
60
+ @outstanding ||= book_def.unique_urls
61
+ end
62
+
63
+ def run
64
+ creator.create(outstanding)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,20 @@
1
+ require 'nokogiri'
2
+ module AudioBookCreator
3
+ class Editor
4
+ attr_accessor :page_def
5
+
6
+ def initialize(page_def)
7
+ @page_def = page_def
8
+ end
9
+
10
+ # convert page[] -> chapter[]
11
+ def parse(pages)
12
+ pages.each_with_index.map do |page, i|
13
+ dom = Nokogiri::HTML(page)
14
+ title = page_def.title(dom) || "Chapter #{i + 1}"
15
+ body = page_def.body(dom)
16
+ Chapter.new(number: (i + 1), title: title, body: body)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,7 @@
1
+ module AudioBookCreator
2
+ module Logging
3
+ def logger
4
+ AudioBookCreator.logger
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,42 @@
1
+ require "sqlite3"
2
+
3
+ module AudioBookCreator
4
+ class PageDb
5
+ include Enumerable
6
+
7
+ # this is for tests - get out of here
8
+ attr_accessor :filename
9
+
10
+ def initialize(filename)
11
+ @filename = filename
12
+ end
13
+
14
+ def []=(key, value)
15
+ db.execute "insert into pages (name, contents) values ( ?, ?)", [key, value]
16
+ end
17
+
18
+ def [](key)
19
+ db.execute("select contents from pages where name = ?", key).map { |row| row.first }.first
20
+ end
21
+
22
+ def include?(key)
23
+ !!self[key]
24
+ end
25
+
26
+ def each(&block)
27
+ db.execute "select name, contents from pages order by rowid", &block
28
+ end
29
+
30
+ private
31
+
32
+ def db
33
+ @db ||= create
34
+ end
35
+
36
+ def create
37
+ SQLite3::Database.new(filename).tap do |db|
38
+ db.execute("create table if not exists pages (name text, contents blob)")
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,31 @@
1
+ module AudioBookCreator
2
+ # information on the format of the html page that is read
3
+ class PageDef
4
+ attr_accessor :title_path, :body_path, :link_path, :chapter_path
5
+
6
+ def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
7
+ @title_path = title_path
8
+ @body_path = body_path
9
+ @link_path = link_path
10
+ @chapter_path = chapter_path
11
+ end
12
+
13
+ def title(dom)
14
+ title = dom.css(title_path).first
15
+ title.text if title
16
+ end
17
+
18
+ def body(dom)
19
+ dom.css(body_path)
20
+ # feels like I need .map { |n| n.text }
21
+ end
22
+
23
+ def page_links(dom, &block)
24
+ dom.css(link_path).map(&block)
25
+ end
26
+
27
+ def chapter_links(dom, &block)
28
+ dom.css(chapter_path).map(&block)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ # migrate to awesome spawn
2
+ module AudioBookCreator
3
+ class Runner
4
+ include Logging
5
+
6
+ def run(cmd, options)
7
+ params = options[:params].flatten.flatten.compact
8
+
9
+ logger.info { "run: #{cmd} #{params.join(" ")}" }
10
+ logger.info ""
11
+ status = system(cmd, *params.map { |x| x.to_s })
12
+ logger.info ""
13
+ logger.info { status ? "success" : "issue" }
14
+
15
+ status
16
+ end
17
+
18
+ def run!(cmd, options)
19
+ run(cmd, options) || raise("trouble running command")
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,54 @@
1
+ module AudioBookCreator
2
+ class Speaker
3
+ attr_accessor :speaker_def
4
+ attr_accessor :book_def
5
+
6
+ def initialize(speaker_def, book_def)
7
+ @speaker_def = speaker_def
8
+ @book_def = book_def
9
+ end
10
+
11
+ def make_directory_structure
12
+ FileUtils.mkdir(base_dir) unless File.exist?(base_dir)
13
+ end
14
+
15
+ def say(chapter)
16
+ raise "Empty chapter" if chapter.empty?
17
+ text_filename = chapter_text_filename(chapter)
18
+ sound_filename = chapter_sound_filename(chapter)
19
+
20
+ AudioBookCreator.optionally_write(text_filename, force) { chapter.to_s }
21
+ AudioBookCreator.optionally_run(sound_filename, force) do
22
+ ["say", params: params(text_filename, sound_filename)]
23
+ end
24
+ SpokenChapter.new(chapter.title, sound_filename)
25
+ end
26
+
27
+ def chapter_text_filename(chapter)
28
+ "#{base_dir}/#{chapter.filename}.txt"
29
+ end
30
+
31
+ def chapter_sound_filename(chapter)
32
+ "#{base_dir}/#{chapter.filename}.m4a"
33
+ end
34
+
35
+ private
36
+
37
+ def base_dir
38
+ book_def.base_dir
39
+ end
40
+
41
+ def force
42
+ speaker_def.regen_audio
43
+ end
44
+
45
+ def params(text_filename, sound_filename)
46
+ {
47
+ "-v" => speaker_def.voice,
48
+ "-r" => speaker_def.rate,
49
+ "-f" => text_filename,
50
+ "-o" => sound_filename,
51
+ }
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,39 @@
1
+ module AudioBookCreator
2
+ class SpeakerDef
3
+
4
+ # currently like the following voices:
5
+ # Vicki # 10
6
+ # Serena # 8 UK
7
+ # Allison # ? (ok)
8
+ # Moira # 7 Irish
9
+ # Fiona # 5 Scottish
10
+ # Kate # 4 UK
11
+ # Susan # 2
12
+ # Zosia # 0 Poland
13
+ # Angelica # 0 Mexican?
14
+ # Paulina # 0 Mexican
15
+ attr_accessor :voice
16
+ attr_accessor :rate
17
+
18
+ attr_accessor :channels
19
+ # split on this hour mark
20
+ attr_accessor :max_hours
21
+ attr_accessor :bit_rate
22
+ attr_accessor :sample_rate
23
+ attr_accessor :regen_audio
24
+
25
+ def initialize(options = {})
26
+ options.each { |n, v| public_send("#{n}=", v) }
27
+
28
+ # for speaking the chapter
29
+ @voice ||= "Vicki"
30
+ @rate ||= 280
31
+
32
+ # for binding the book
33
+ @channels ||= 1
34
+ @bit_rate ||= 32
35
+ @max_hours ||= 7
36
+ @sample_rate ||= 22_050
37
+ end
38
+ end
39
+ end