audio_book_creator 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +4 -0
  4. data/.travis.yml +8 -0
  5. data/Gemfile +8 -0
  6. data/README.md +60 -0
  7. data/Rakefile +8 -0
  8. data/audio_book_creator.gemspec +31 -0
  9. data/bin/audio_book_creator +6 -0
  10. data/lib/audio_book_creator.rb +59 -0
  11. data/lib/audio_book_creator/binder.rb +61 -0
  12. data/lib/audio_book_creator/book_creator.rb +31 -0
  13. data/lib/audio_book_creator/book_def.rb +36 -0
  14. data/lib/audio_book_creator/cached_hash.rb +20 -0
  15. data/lib/audio_book_creator/cascading_array.rb +57 -0
  16. data/lib/audio_book_creator/chapter.rb +33 -0
  17. data/lib/audio_book_creator/cli.rb +119 -0
  18. data/lib/audio_book_creator/conductor.rb +67 -0
  19. data/lib/audio_book_creator/editor.rb +20 -0
  20. data/lib/audio_book_creator/logging.rb +7 -0
  21. data/lib/audio_book_creator/page_db.rb +42 -0
  22. data/lib/audio_book_creator/page_def.rb +31 -0
  23. data/lib/audio_book_creator/runner.rb +22 -0
  24. data/lib/audio_book_creator/speaker.rb +54 -0
  25. data/lib/audio_book_creator/speaker_def.rb +39 -0
  26. data/lib/audio_book_creator/spider.rb +60 -0
  27. data/lib/audio_book_creator/spoken_chapter.rb +16 -0
  28. data/lib/audio_book_creator/surfer_def.rb +15 -0
  29. data/lib/audio_book_creator/url_filter.rb +33 -0
  30. data/lib/audio_book_creator/version.rb +3 -0
  31. data/lib/audio_book_creator/web.rb +44 -0
  32. data/spec/audio_book_creator/binder_spec.rb +103 -0
  33. data/spec/audio_book_creator/book_creator_spec.rb +63 -0
  34. data/spec/audio_book_creator/book_def_spec.rb +61 -0
  35. data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
  36. data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
  37. data/spec/audio_book_creator/chapter_spec.rb +80 -0
  38. data/spec/audio_book_creator/cli_spec.rb +274 -0
  39. data/spec/audio_book_creator/conductor_spec.rb +102 -0
  40. data/spec/audio_book_creator/editor_spec.rb +39 -0
  41. data/spec/audio_book_creator/logging_spec.rb +21 -0
  42. data/spec/audio_book_creator/page_db_spec.rb +74 -0
  43. data/spec/audio_book_creator/page_def_spec.rb +79 -0
  44. data/spec/audio_book_creator/runner_spec.rb +65 -0
  45. data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
  46. data/spec/audio_book_creator/speaker_spec.rb +105 -0
  47. data/spec/audio_book_creator/spider_spec.rb +172 -0
  48. data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
  49. data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
  50. data/spec/audio_book_creator/url_filter_spec.rb +52 -0
  51. data/spec/audio_book_creator/version_spec.rb +5 -0
  52. data/spec/audio_book_creator/web_spec.rb +66 -0
  53. data/spec/audio_book_creator_spec.rb +25 -0
  54. data/spec/spec_helper.rb +106 -0
  55. data/spec/support/test_logger.rb +21 -0
  56. metadata +238 -0
@@ -0,0 +1,33 @@
1
+ module AudioBookCreator
2
+ class Chapter
3
+ attr_accessor :number, :title, :body
4
+
5
+ def initialize(options = {})
6
+ options.each { |n, v| public_send("#{n}=", v) }
7
+ @body = Array(@body).compact.join("\n\n")
8
+ end
9
+
10
+ def filename
11
+ format("chapter%02d", number)
12
+ end
13
+
14
+ def empty?
15
+ body.empty?
16
+ end
17
+
18
+ def present?
19
+ !empty?
20
+ end
21
+
22
+ def to_s
23
+ "#{title}\n\n#{body}\n"
24
+ end
25
+
26
+ def ==(other)
27
+ other.kind_of?(Chapter) &&
28
+ other.number == number &&
29
+ other.title.eql?(title) && other.body.eql?(body)
30
+ end
31
+ alias :eql? :==
32
+ end
33
+ end
@@ -0,0 +1,119 @@
1
+ require 'optparse'
2
+ require 'fileutils'
3
+ require 'logger'
4
+ require 'uri'
5
+
6
+ module AudioBookCreator
7
+ class Cli
8
+ include Logging
9
+ def initialize
10
+ self.verbose = false
11
+ page_def.title_path = "h1"
12
+ page_def.body_path = "p"
13
+ page_def.link_path = "a"
14
+ end
15
+
16
+ # stub for testing
17
+ attr_writer :web
18
+
19
+ def set_args(argv, usage)
20
+ if argv.empty?
21
+ puts "please url", usage
22
+ exit 2
23
+ elsif argv.first.include?("://")
24
+ book_def.title = argv.first.split("/").last
25
+ book_def.urls = argv
26
+ else
27
+ book_def.title = argv.shift
28
+ book_def.urls = argv
29
+ end
30
+ surfer_def.cache_filename = database
31
+ surfer_def.host = book_def.urls.first
32
+ end
33
+
34
+ def database
35
+ "pages.db"
36
+ end
37
+
38
+ def verbose=(val)
39
+ logger.level = val ? Logger::INFO : Logger::WARN
40
+ end
41
+
42
+ def parse(argv)
43
+ options = OptionParser.new do |opts|
44
+ opts.program_name = "audio_book_creator"
45
+ opts.version = VERSION
46
+ opts.banner = "Usage: audio_book_creator [options] title url [url] [...]"
47
+ opt(opts, self) do |o|
48
+ o.opt(:verbose, "-v", "--verbose", "Run verbosely")
49
+ end
50
+ opt(opts, page_def) do |o|
51
+ o.opt(:title_path, "--title STRING", "Title css (e.g.: h1)")
52
+ o.opt(:body_path, "--body STRING", "Content css (e.g.: p)")
53
+ o.opt(:link_path, "--link STRING", "Next Page css (e.g.: a.Next)")
54
+ o.opt(:chapter_path, "--chapter STRING", "Next Chapter css")
55
+ end
56
+ opt(opts, surfer_def) do |o|
57
+ o.opt(:max, "--no-max", "Don't limit the number of pages to visit")
58
+ o.opt(:max, "--max NUMBER", Integer, "Maximum number of pages to visit (default: 10)")
59
+ o.opt(:regen_html, "--force-html", "Regerate the audio")
60
+ end
61
+ opt(opts, speaker_def) do |o|
62
+ o.opt(:regen_audio, "--force-audio", "Regerate the audio")
63
+ o.opt(:rate, "--rate NUMBER", Integer, "Set words per minute")
64
+ o.opt(:voice, "--voice STRING", "Set speaker voice")
65
+ end
66
+ opt(opts, book_def) do |o|
67
+ o.opt(:base_dir, "--base-dir STRING", "Directory to hold files")
68
+ o.opt(:itunes, "--itunes", "-A", "Load book into itunes")
69
+ end
70
+ end
71
+ options.parse!(argv)
72
+ set_args(argv, options.to_s)
73
+ self
74
+ end
75
+
76
+ # parameter objects
77
+
78
+ def page_def
79
+ @page_def ||= PageDef.new
80
+ end
81
+
82
+ def book_def
83
+ @book_def ||= BookDef.new
84
+ end
85
+
86
+ def speaker_def
87
+ @speaker_def ||= SpeakerDef.new
88
+ end
89
+
90
+ def surfer_def
91
+ @surfer_def ||= SurferDef.new
92
+ end
93
+
94
+ def conductor
95
+ @conductor ||= Conductor.new(page_def, book_def, speaker_def, surfer_def)
96
+ end
97
+
98
+ def run
99
+ conductor.run
100
+ end
101
+
102
+ private
103
+
104
+ def opt(opts, model)
105
+ yield OptSetter.new(opts, model)
106
+ end
107
+
108
+ class OptSetter
109
+ def initialize(opts, model)
110
+ @opts = opts
111
+ @model = model
112
+ end
113
+
114
+ def opt(value, *args)
115
+ @opts.on(*args) { |v| @model.send("#{value}=", v) }
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,67 @@
1
+ module AudioBookCreator
2
+ class Conductor
3
+ attr_accessor :page_def
4
+ attr_accessor :book_def
5
+ attr_accessor :speaker_def
6
+ attr_accessor :surfer_def
7
+
8
+ def initialize(page_def, book_def, speaker_def, surfer_def)
9
+ @page_def = page_def
10
+ @book_def = book_def
11
+ @speaker_def = speaker_def
12
+ @surfer_def = surfer_def
13
+ end
14
+
15
+ # components
16
+
17
+ ## spider
18
+
19
+ def page_cache
20
+ @page_cache ||= PageDb.new(surfer_def.cache_filename)
21
+ end
22
+
23
+ def web
24
+ @web ||= Web.new(surfer_def.max)
25
+ end
26
+
27
+ def cached_web
28
+ @cached_hash ||= CachedHash.new(page_cache, web)
29
+ end
30
+
31
+ def invalid_urls
32
+ @invalid_urls ||= UrlFilter.new(surfer_def.host)
33
+ end
34
+
35
+ def spider
36
+ @spider ||= Spider.new(page_def, cached_web, invalid_urls)
37
+ end
38
+
39
+ ##
40
+
41
+ def editor
42
+ @editor ||= Editor.new(page_def)
43
+ end
44
+
45
+ def speaker
46
+ @speaker ||= Speaker.new(speaker_def, book_def)
47
+ end
48
+
49
+ def binder
50
+ @binder ||= Binder.new(book_def, speaker_def)
51
+ end
52
+
53
+ ##
54
+
55
+ def creator
56
+ @creator ||= BookCreator.new(spider, editor, speaker, binder)
57
+ end
58
+
59
+ def outstanding
60
+ @outstanding ||= book_def.unique_urls
61
+ end
62
+
63
+ def run
64
+ creator.create(outstanding)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,20 @@
1
+ require 'nokogiri'
2
+ module AudioBookCreator
3
+ class Editor
4
+ attr_accessor :page_def
5
+
6
+ def initialize(page_def)
7
+ @page_def = page_def
8
+ end
9
+
10
+ # convert page[] -> chapter[]
11
+ def parse(pages)
12
+ pages.each_with_index.map do |page, i|
13
+ dom = Nokogiri::HTML(page)
14
+ title = page_def.title(dom) || "Chapter #{i + 1}"
15
+ body = page_def.body(dom)
16
+ Chapter.new(number: (i + 1), title: title, body: body)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,7 @@
1
+ module AudioBookCreator
2
+ module Logging
3
+ def logger
4
+ AudioBookCreator.logger
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,42 @@
1
+ require "sqlite3"
2
+
3
+ module AudioBookCreator
4
+ class PageDb
5
+ include Enumerable
6
+
7
+ # this is for tests - get out of here
8
+ attr_accessor :filename
9
+
10
+ def initialize(filename)
11
+ @filename = filename
12
+ end
13
+
14
+ def []=(key, value)
15
+ db.execute "insert into pages (name, contents) values ( ?, ?)", [key, value]
16
+ end
17
+
18
+ def [](key)
19
+ db.execute("select contents from pages where name = ?", key).map { |row| row.first }.first
20
+ end
21
+
22
+ def include?(key)
23
+ !!self[key]
24
+ end
25
+
26
+ def each(&block)
27
+ db.execute "select name, contents from pages order by rowid", &block
28
+ end
29
+
30
+ private
31
+
32
+ def db
33
+ @db ||= create
34
+ end
35
+
36
+ def create
37
+ SQLite3::Database.new(filename).tap do |db|
38
+ db.execute("create table if not exists pages (name text, contents blob)")
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,31 @@
1
+ module AudioBookCreator
2
+ # information on the format of the html page that is read
3
+ class PageDef
4
+ attr_accessor :title_path, :body_path, :link_path, :chapter_path
5
+
6
+ def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
7
+ @title_path = title_path
8
+ @body_path = body_path
9
+ @link_path = link_path
10
+ @chapter_path = chapter_path
11
+ end
12
+
13
+ def title(dom)
14
+ title = dom.css(title_path).first
15
+ title.text if title
16
+ end
17
+
18
+ def body(dom)
19
+ dom.css(body_path)
20
+ # feels like I need .map { |n| n.text }
21
+ end
22
+
23
+ def page_links(dom, &block)
24
+ dom.css(link_path).map(&block)
25
+ end
26
+
27
+ def chapter_links(dom, &block)
28
+ dom.css(chapter_path).map(&block)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ # migrate to awesome spawn
2
+ module AudioBookCreator
3
+ class Runner
4
+ include Logging
5
+
6
+ def run(cmd, options)
7
+ params = options[:params].flatten.flatten.compact
8
+
9
+ logger.info { "run: #{cmd} #{params.join(" ")}" }
10
+ logger.info ""
11
+ status = system(cmd, *params.map { |x| x.to_s })
12
+ logger.info ""
13
+ logger.info { status ? "success" : "issue" }
14
+
15
+ status
16
+ end
17
+
18
+ def run!(cmd, options)
19
+ run(cmd, options) || raise("trouble running command")
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,54 @@
1
+ module AudioBookCreator
2
+ class Speaker
3
+ attr_accessor :speaker_def
4
+ attr_accessor :book_def
5
+
6
+ def initialize(speaker_def, book_def)
7
+ @speaker_def = speaker_def
8
+ @book_def = book_def
9
+ end
10
+
11
+ def make_directory_structure
12
+ FileUtils.mkdir(base_dir) unless File.exist?(base_dir)
13
+ end
14
+
15
+ def say(chapter)
16
+ raise "Empty chapter" if chapter.empty?
17
+ text_filename = chapter_text_filename(chapter)
18
+ sound_filename = chapter_sound_filename(chapter)
19
+
20
+ AudioBookCreator.optionally_write(text_filename, force) { chapter.to_s }
21
+ AudioBookCreator.optionally_run(sound_filename, force) do
22
+ ["say", params: params(text_filename, sound_filename)]
23
+ end
24
+ SpokenChapter.new(chapter.title, sound_filename)
25
+ end
26
+
27
+ def chapter_text_filename(chapter)
28
+ "#{base_dir}/#{chapter.filename}.txt"
29
+ end
30
+
31
+ def chapter_sound_filename(chapter)
32
+ "#{base_dir}/#{chapter.filename}.m4a"
33
+ end
34
+
35
+ private
36
+
37
+ def base_dir
38
+ book_def.base_dir
39
+ end
40
+
41
+ def force
42
+ speaker_def.regen_audio
43
+ end
44
+
45
+ def params(text_filename, sound_filename)
46
+ {
47
+ "-v" => speaker_def.voice,
48
+ "-r" => speaker_def.rate,
49
+ "-f" => text_filename,
50
+ "-o" => sound_filename,
51
+ }
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,39 @@
1
+ module AudioBookCreator
2
+ class SpeakerDef
3
+
4
+ # currently like the following voices:
5
+ # Vicki # 10
6
+ # Serena # 8 UK
7
+ # Allison # ? (ok)
8
+ # Moira # 7 Irish
9
+ # Fiona # 5 Scottish
10
+ # Kate # 4 UK
11
+ # Susan # 2
12
+ # Zosia # 0 Poland
13
+ # Angelica # 0 Mexican?
14
+ # Paulina # 0 Mexican
15
+ attr_accessor :voice
16
+ attr_accessor :rate
17
+
18
+ attr_accessor :channels
19
+ # split on this hour mark
20
+ attr_accessor :max_hours
21
+ attr_accessor :bit_rate
22
+ attr_accessor :sample_rate
23
+ attr_accessor :regen_audio
24
+
25
+ def initialize(options = {})
26
+ options.each { |n, v| public_send("#{n}=", v) }
27
+
28
+ # for speaking the chapter
29
+ @voice ||= "Vicki"
30
+ @rate ||= 280
31
+
32
+ # for binding the book
33
+ @channels ||= 1
34
+ @bit_rate ||= 32
35
+ @max_hours ||= 7
36
+ @sample_rate ||= 22_050
37
+ end
38
+ end
39
+ end