audio_book_creator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +4 -0
  4. data/.travis.yml +8 -0
  5. data/Gemfile +8 -0
  6. data/README.md +60 -0
  7. data/Rakefile +8 -0
  8. data/audio_book_creator.gemspec +31 -0
  9. data/bin/audio_book_creator +6 -0
  10. data/lib/audio_book_creator.rb +59 -0
  11. data/lib/audio_book_creator/binder.rb +61 -0
  12. data/lib/audio_book_creator/book_creator.rb +31 -0
  13. data/lib/audio_book_creator/book_def.rb +36 -0
  14. data/lib/audio_book_creator/cached_hash.rb +20 -0
  15. data/lib/audio_book_creator/cascading_array.rb +57 -0
  16. data/lib/audio_book_creator/chapter.rb +33 -0
  17. data/lib/audio_book_creator/cli.rb +119 -0
  18. data/lib/audio_book_creator/conductor.rb +67 -0
  19. data/lib/audio_book_creator/editor.rb +20 -0
  20. data/lib/audio_book_creator/logging.rb +7 -0
  21. data/lib/audio_book_creator/page_db.rb +42 -0
  22. data/lib/audio_book_creator/page_def.rb +31 -0
  23. data/lib/audio_book_creator/runner.rb +22 -0
  24. data/lib/audio_book_creator/speaker.rb +54 -0
  25. data/lib/audio_book_creator/speaker_def.rb +39 -0
  26. data/lib/audio_book_creator/spider.rb +60 -0
  27. data/lib/audio_book_creator/spoken_chapter.rb +16 -0
  28. data/lib/audio_book_creator/surfer_def.rb +15 -0
  29. data/lib/audio_book_creator/url_filter.rb +33 -0
  30. data/lib/audio_book_creator/version.rb +3 -0
  31. data/lib/audio_book_creator/web.rb +44 -0
  32. data/spec/audio_book_creator/binder_spec.rb +103 -0
  33. data/spec/audio_book_creator/book_creator_spec.rb +63 -0
  34. data/spec/audio_book_creator/book_def_spec.rb +61 -0
  35. data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
  36. data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
  37. data/spec/audio_book_creator/chapter_spec.rb +80 -0
  38. data/spec/audio_book_creator/cli_spec.rb +274 -0
  39. data/spec/audio_book_creator/conductor_spec.rb +102 -0
  40. data/spec/audio_book_creator/editor_spec.rb +39 -0
  41. data/spec/audio_book_creator/logging_spec.rb +21 -0
  42. data/spec/audio_book_creator/page_db_spec.rb +74 -0
  43. data/spec/audio_book_creator/page_def_spec.rb +79 -0
  44. data/spec/audio_book_creator/runner_spec.rb +65 -0
  45. data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
  46. data/spec/audio_book_creator/speaker_spec.rb +105 -0
  47. data/spec/audio_book_creator/spider_spec.rb +172 -0
  48. data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
  49. data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
  50. data/spec/audio_book_creator/url_filter_spec.rb +52 -0
  51. data/spec/audio_book_creator/version_spec.rb +5 -0
  52. data/spec/audio_book_creator/web_spec.rb +66 -0
  53. data/spec/audio_book_creator_spec.rb +25 -0
  54. data/spec/spec_helper.rb +106 -0
  55. data/spec/support/test_logger.rb +21 -0
  56. metadata +238 -0
@@ -0,0 +1,60 @@
1
+ require 'nokogiri'
2
+ require 'uri'
3
+
4
+ module AudioBookCreator
5
+ class Spider
6
+ include Logging
7
+
8
+ # @!attribute web
9
+ # @return Hash access to the world wide web
10
+ attr_accessor :web
11
+ attr_accessor :invalid_urls
12
+
13
+ attr_accessor :page_def
14
+
15
+ def initialize(page_def, web = {}, invalid_urls = {})
16
+ @page_def = page_def
17
+ @web = web
18
+ @invalid_urls = invalid_urls
19
+ end
20
+
21
+ def run(chapters)
22
+ outstanding = CascadingArray.new([], chapters.map { |o| uri(o) })
23
+ visited = []
24
+
25
+ while (url = outstanding.shift)
26
+ contents, new_pages, new_chapters = visit_page(url)
27
+ visited << contents
28
+ new_pages.each do |href|
29
+ outstanding.add_unique_page(href) unless invalid_urls.include?(href)
30
+ end
31
+ new_chapters.each do |href|
32
+ outstanding.add_unique_chapter(href) unless invalid_urls.include?(href)
33
+ end
34
+ end
35
+ visited
36
+ end
37
+
38
+ private
39
+
40
+ # this one hangs on mutations
41
+ def visit_page(url)
42
+ logger.info { "visit #{url}" }
43
+ page = web[url.to_s]
44
+ doc = Nokogiri::HTML(page)
45
+ [
46
+ page,
47
+ page_def.page_links(doc) { |a| uri(url, a["href"]) },
48
+ page_def.chapter_links(doc) { |a| uri(url, a["href"]) }
49
+ ]
50
+ end
51
+
52
+ # raises URI::Error (BadURIError)
53
+ def uri(url, alt = nil)
54
+ url = URI.parse(url) unless url.is_a?(URI)
55
+ url += alt if alt
56
+ url.fragment = nil # remove #x part of url
57
+ url
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,16 @@
1
+ module AudioBookCreator
2
+ class SpokenChapter
3
+ attr_accessor :title, :filename
4
+
5
+ def initialize(title, filename)
6
+ @title = title
7
+ @filename = filename
8
+ end
9
+
10
+ def ==(other)
11
+ other.kind_of?(SpokenChapter) &&
12
+ other.title.eql?(title) && other.filename.eql?(filename)
13
+ end
14
+ alias :eql? :==
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ module AudioBookCreator
2
+ class SurferDef
3
+ attr_accessor :host
4
+ attr_accessor :max
5
+ attr_accessor :regen_html
6
+ attr_accessor :cache_filename
7
+
8
+ def initialize(host = nil, max = nil, regen_html = nil, cache_filename = nil)
9
+ @host = host
10
+ @max = max
11
+ @regen_html = regen_html
12
+ @cache_filename = cache_filename
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,33 @@
1
+ require 'uri'
2
+
3
+ module AudioBookCreator
4
+ class UrlFilter
5
+ include Logging
6
+ attr_accessor :host
7
+
8
+ def initialize(host)
9
+ self.host = host
10
+ end
11
+
12
+ def host=(url)
13
+ @host = url && (url.is_a?(URI) ? url : URI.parse(url)).host
14
+ end
15
+
16
+ # return true if this is invalid
17
+ def include?(url)
18
+ if !valid_extensions.include?(File.extname(url.path))
19
+ logger.warn { "ignoring bad file extension #{url}" }
20
+ raise "bad file extension"
21
+ elsif host && (host != url.host)
22
+ logger.warn { "ignoring remote url #{url}" }
23
+ raise "remote url #{url}"
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def valid_extensions
30
+ ["", '.html', '.htm', '.php', '.jsp']
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module AudioBookCreator
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,44 @@
1
+ require 'open-uri'
2
+ require 'uri'
3
+
4
+ module AudioBookCreator
5
+ class Web
6
+ include Logging
7
+
8
+ # @!attribute max
9
+ # @return Integer the maximum number of pages to visit
10
+ attr_accessor :max
11
+
12
+ # @!attribute count
13
+ # @return Integer the number of pages visited
14
+ attr_accessor :count
15
+
16
+ def initialize(max = nil)
17
+ @max = max
18
+ @count = 0
19
+ end
20
+
21
+ def [](url)
22
+ @count += 1
23
+ log_page(url)
24
+ check_limit
25
+ open(url.to_s).read
26
+ end
27
+
28
+ private
29
+
30
+ def log_page(url)
31
+ logger.info do
32
+ max ? "fetch #{url} [#{count}/#{max}]" : "fetch #{url} [#{count}]"
33
+ end
34
+ end
35
+
36
+ def check_limit
37
+ raise "visited #{max} pages" if over_limit?
38
+ end
39
+
40
+ def over_limit?
41
+ max && count > max
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,103 @@
1
+ require "spec_helper"
2
+
3
+ describe AudioBookCreator::Binder do
4
+ let(:book_def) { AudioBookCreator::BookDef.new("title", nil, "dir", nil, false) }
5
+ let(:speaker_def) { AudioBookCreator::SpeakerDef.new(regen_audio: false) }
6
+ subject { described_class.new(book_def, speaker_def) }
7
+
8
+ it "should require a chapter" do
9
+ expect_runner.not_to receive(:system)
10
+ expect { subject.create([]) }.to raise_error
11
+ end
12
+
13
+ it "should do nothing if m4b exists" do
14
+ expect(File).to receive(:exist?).with("title.m4b").and_return(true)
15
+
16
+ expect_runner.not_to receive(:system)
17
+ subject.create([spoken_chapter])
18
+ end
19
+
20
+ it "should base filename on title and sanitize it" do
21
+ book_def.title = "the title"
22
+ expect_runner.not_to receive(:system)
23
+ expect(File).to receive(:exist?).with("the-title.m4b").and_return(true)
24
+ subject.create([spoken_chapter])
25
+ end
26
+
27
+ it "should create text and m4a file" do
28
+ expect(File).to receive(:exist?).with("title.m4b").and_return(false)
29
+
30
+ expect_runner.to receive(:system)
31
+ .with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
32
+ "-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
33
+ "@the title@", "dir/chapter01.m4a").and_return(true)
34
+ subject.create([spoken_chapter])
35
+ end
36
+
37
+ context "with itunes" do
38
+ before { book_def.itunes = true}
39
+ subject { described_class.new(book_def, speaker_def) }
40
+ it "should load into itunes" do
41
+ expect(File).to receive(:exist?).with("title.m4b").and_return(false)
42
+
43
+ expect_runner.to receive(:system)
44
+ .with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
45
+ "-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
46
+ "@the title@", "dir/chapter01.m4a", "-A").and_return(true)
47
+ subject.create([spoken_chapter])
48
+ end
49
+ end
50
+
51
+ it "outputs messages if set to verbose" do
52
+ enable_logging
53
+ expect(File).to receive(:exist?).and_return(false)
54
+
55
+ expect_runner.to receive(:system).and_return(true)
56
+
57
+ subject.create([spoken_chapter])
58
+ expect_to_have_logged(/^run:/, "", "","success")
59
+ end
60
+
61
+ it "outputs no messages if set to non verbose" do
62
+ expect(File).to receive(:exist?).and_return(false)
63
+
64
+ expect_runner.to receive(:system).and_return(true)
65
+ subject.create([spoken_chapter])
66
+ expect_to_have_logged()
67
+ end
68
+
69
+ context "with force" do
70
+ before { speaker_def.regen_audio = true }
71
+ subject { described_class.new(book_def, speaker_def) }
72
+
73
+ it "should create m4a if exists" do
74
+ expect(File).not_to receive(:exist?)
75
+
76
+ expect_runner.to receive(:system).and_return(true)
77
+ subject.create([spoken_chapter])
78
+ end
79
+ end
80
+
81
+ context "with false force" do
82
+ subject { described_class.new(book_def, speaker_def) }
83
+
84
+ it "should not create m4a if exists" do
85
+ expect(File).to receive(:exist?).and_return(true)
86
+
87
+ expect_runner.not_to receive(:system)
88
+ subject.create([spoken_chapter])
89
+ end
90
+ end
91
+
92
+ it "requires chapters to be passed in" do
93
+ expect_runner.not_to receive(:system)
94
+ expect { subject.create(nil) }.to raise_error("No Chapters")
95
+ expect { subject.create([]) }.to raise_error("No Chapters")
96
+ end
97
+
98
+ private
99
+
100
+ def expect_runner
101
+ expect_any_instance_of(AudioBookCreator::Runner)
102
+ end
103
+ end
@@ -0,0 +1,63 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::BookCreator do
4
+
5
+ # this is kinda testing the implementation
6
+ context "#run" do
7
+ let(:spider) { double(:spider) }
8
+ let(:editor) { double(:editor) }
9
+ let(:speaker) { double(:speaker) }
10
+ let(:binder) { double(:binder) }
11
+
12
+ subject { described_class.new(spider, editor, speaker, binder) }
13
+
14
+ it "should call all the constructors and components" do
15
+
16
+ outstanding = ["http://site.com/"]
17
+ page_contents = ["site.com contents"]
18
+ chapters = [
19
+ chapter("contents1", "title1", 1),
20
+ chapter("contents2", "title2", 2)
21
+ ]
22
+ spoken_chapters = [
23
+ spoken_chapter("title1", "dir/chapter01.m4a"),
24
+ spoken_chapter("title2", "dir/chapter02.m4a")
25
+ ]
26
+ expect(speaker).to receive(:make_directory_structure)
27
+ expect(spider).to receive(:run).with(outstanding).and_return(page_contents)
28
+ expect(editor).to receive(:parse).with(page_contents).and_return(chapters)
29
+ expect(speaker).to receive(:say).with(chapters.first).and_return(spoken_chapters.first)
30
+ expect(speaker).to receive(:say).with(chapters.last).and_return(spoken_chapters.last)
31
+ expect(binder).to receive(:create).with(spoken_chapters)
32
+
33
+ subject.create(outstanding)
34
+ end
35
+ end
36
+
37
+ # taken from cli
38
+ # context "real object" do
39
+ # it "spiders the web" do
40
+ # # spider:
41
+ # expect_visit_page("http://site.com/", "<h1>title</h1>", "<p>contents</p>")
42
+ # # speaker:
43
+ # expect(File).to receive(:exist?).with("title").and_return(true)
44
+ # expect(File).to receive(:exist?).with("title/chapter01.txt").and_return(true)
45
+ # expect(File).to receive(:exist?).with("title/chapter01.m4a").and_return(true)
46
+ # # binder
47
+ # expect(File).to receive(:exist?).with("title.m4b").and_return(true)
48
+ # # chain parse and run to mimic bin/audio_book_creator
49
+ # subject.parse(%w(title http://site.com/ -v)).run
50
+ # expect(AudioBookCreator.logger.level).to eq(Logger::INFO)
51
+ # end
52
+ # end
53
+
54
+ # private
55
+
56
+ # # NOTE: this uses any_instance because we don't want to instantiate anything
57
+ # # could assign web and use a double instead
58
+ # def expect_visit_page(url, *args)
59
+ # url = site(url)
60
+ # expect_any_instance_of(AudioBookCreator::Web).to receive(:[])
61
+ # .with(url).and_return(page(url, *args))
62
+ # end
63
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::BookDef do
4
+ context "with no parameter" do
5
+ subject { described_class.new }
6
+ it { expect(subject.title).to eq(nil) }
7
+ it { expect(subject.author).to eq("Vicki") }
8
+ it { expect(subject.urls).to be_nil }
9
+ it { expect(subject.itunes).to be_falsy }
10
+ end
11
+
12
+ context "with title" do
13
+ subject { described_class.new("dir") }
14
+ it { expect(subject.base_dir).to eq("dir") }
15
+ it { expect(subject.title).to eq("dir") }
16
+ end
17
+
18
+ context "with all parameters" do
19
+ subject { described_class.new("the title", "author", "dir", %w(a b), true) }
20
+ it { expect(subject.base_dir).to eq("dir") }
21
+ it { expect(subject.title).to eq("the title") }
22
+ it { expect(subject.author).to eq("author") }
23
+ it { expect(subject.filename).to eq("the-title.m4b") }
24
+ it { expect(subject.urls).to eq(%w(a b)) }
25
+ it { expect(subject.itunes).to be_truthy }
26
+ end
27
+
28
+ context "with derived title" do
29
+ subject { described_class.new("the title", "author", nil, nil) }
30
+ it { expect(subject.base_dir).to eq("the-title") }
31
+ end
32
+
33
+ context "#unique_urls" do
34
+ subject { described_class.new("dir") }
35
+ before { subject.urls = %w(http://site.com/title http://site.com/title http://site.com/title2) }
36
+ it { expect(subject.unique_urls).to eq(%w(http://site.com/title http://site.com/title2)) }
37
+ end
38
+
39
+ context ".sanitize_filename" do
40
+ subject { described_class }
41
+ it "should join strings" do
42
+ expect(subject.sanitize_filename("title", "jpg")).to eq("title.jpg")
43
+ end
44
+
45
+ it "should handle arrays" do
46
+ expect(subject.sanitize_filename(%w(title jpg))).to eq("title.jpg")
47
+ end
48
+
49
+ it "should ignore nils" do
50
+ expect(subject.sanitize_filename("title", nil)).to eq("title")
51
+ end
52
+
53
+ it "should support titles with spaces" do
54
+ expect(subject.sanitize_filename(%{title ((for "you", "Amy", and "John"))})).to eq("title-for-you-Amy-and-John")
55
+ end
56
+
57
+ it "should support titles with extra stuff" do
58
+ expect(subject.sanitize_filename("title,for!")).to eq("title-for")
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::CachedHash do
4
+ let(:cache) { {} }
5
+ let(:main) { {} }
6
+ subject { described_class.new(cache, main) }
7
+
8
+ context "#with cached content" do
9
+ let(:cache) { {:key => "val"} }
10
+ it { expect(subject[:key]).to eq("val") }
11
+ it { subject[:key] ; expect(main[:key]).to be_nil }
12
+ end
13
+
14
+ context "#with main content" do
15
+ let(:main) { {:key => "val"} }
16
+ it { expect(subject[:key]).to eq("val") }
17
+ it { subject[:key] ; expect(cache[:key]).to eq("val") }
18
+ end
19
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::CascadingArray do
4
+ let(:pages) { [:p1, :p2] }
5
+ let(:chapters) { [:ch1, :ch2] }
6
+ subject { described_class.new(pages, chapters) }
7
+
8
+ it { is_expected.to be_include(:p1) }
9
+ it { is_expected.to be_include(:ch2) }
10
+ it { is_expected.not_to be_include(:ch4) }
11
+
12
+
13
+ it "includes even after it is empty" do
14
+ 4.times { subject.shift }
15
+ expect(subject.shift).to be_nil
16
+ expect(subject).to be_include(:p1)
17
+ expect(subject).to be_include(:ch1)
18
+ end
19
+
20
+ it "includes later added values" do
21
+ subject.add_page(:p3)
22
+ subject.add_chapter(:c3)
23
+ expect(subject.each.to_a).to eq([:p1,:p2,:p3,:ch1,:ch2,:c3])
24
+ expect(subject).to be_include(:p3)
25
+ expect(subject).to be_include(:c3)
26
+ end
27
+
28
+ it "takes primary before secondary" do
29
+ expect(subject.shift).to eq(:p1)
30
+ expect(subject.shift).to eq(:p2)
31
+ expect(subject.shift).to eq(:ch1)
32
+ expect(subject.shift).to eq(:ch2)
33
+ expect(subject.shift).to be_nil
34
+ end
35
+
36
+ it "enumerates" do
37
+ ret = []
38
+ subject.each { |x| ret << x }
39
+ expect(ret).to eq([:p1,:p2,:ch1,:ch2])
40
+ end
41
+
42
+ it "non block enumerates" do
43
+ expect(subject.each).to be_a(Enumerator)
44
+ expect(subject.each.to_a).to eq([:p1,:p2,:ch1,:ch2])
45
+ end
46
+
47
+
48
+ it "puts pages into primary" do
49
+ subject.add_page(:p3)
50
+ expect(subject.each.to_a).to eq([:p1,:p2,:p3,:ch1,:ch2])
51
+ end
52
+
53
+ it "puts non duplicate pages into primary" do
54
+ subject.add_unique_page(:p3)
55
+ subject.add_unique_page(:p3)
56
+ expect(subject.each.to_a).to eq([:p1,:p2,:p3,:ch1,:ch2])
57
+ end
58
+
59
+ it "puts non duplicate chapters into secondary" do
60
+ subject.add_unique_chapter(:ch3)
61
+ subject.add_unique_chapter(:ch3)
62
+ expect(subject.each.to_a).to eq([:p1,:p2,:ch1,:ch2,:ch3])
63
+ end
64
+ end