audio_book_creator 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +4 -0
  4. data/.travis.yml +8 -0
  5. data/Gemfile +8 -0
  6. data/README.md +60 -0
  7. data/Rakefile +8 -0
  8. data/audio_book_creator.gemspec +31 -0
  9. data/bin/audio_book_creator +6 -0
  10. data/lib/audio_book_creator.rb +59 -0
  11. data/lib/audio_book_creator/binder.rb +61 -0
  12. data/lib/audio_book_creator/book_creator.rb +31 -0
  13. data/lib/audio_book_creator/book_def.rb +36 -0
  14. data/lib/audio_book_creator/cached_hash.rb +20 -0
  15. data/lib/audio_book_creator/cascading_array.rb +57 -0
  16. data/lib/audio_book_creator/chapter.rb +33 -0
  17. data/lib/audio_book_creator/cli.rb +119 -0
  18. data/lib/audio_book_creator/conductor.rb +67 -0
  19. data/lib/audio_book_creator/editor.rb +20 -0
  20. data/lib/audio_book_creator/logging.rb +7 -0
  21. data/lib/audio_book_creator/page_db.rb +42 -0
  22. data/lib/audio_book_creator/page_def.rb +31 -0
  23. data/lib/audio_book_creator/runner.rb +22 -0
  24. data/lib/audio_book_creator/speaker.rb +54 -0
  25. data/lib/audio_book_creator/speaker_def.rb +39 -0
  26. data/lib/audio_book_creator/spider.rb +60 -0
  27. data/lib/audio_book_creator/spoken_chapter.rb +16 -0
  28. data/lib/audio_book_creator/surfer_def.rb +15 -0
  29. data/lib/audio_book_creator/url_filter.rb +33 -0
  30. data/lib/audio_book_creator/version.rb +3 -0
  31. data/lib/audio_book_creator/web.rb +44 -0
  32. data/spec/audio_book_creator/binder_spec.rb +103 -0
  33. data/spec/audio_book_creator/book_creator_spec.rb +63 -0
  34. data/spec/audio_book_creator/book_def_spec.rb +61 -0
  35. data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
  36. data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
  37. data/spec/audio_book_creator/chapter_spec.rb +80 -0
  38. data/spec/audio_book_creator/cli_spec.rb +274 -0
  39. data/spec/audio_book_creator/conductor_spec.rb +102 -0
  40. data/spec/audio_book_creator/editor_spec.rb +39 -0
  41. data/spec/audio_book_creator/logging_spec.rb +21 -0
  42. data/spec/audio_book_creator/page_db_spec.rb +74 -0
  43. data/spec/audio_book_creator/page_def_spec.rb +79 -0
  44. data/spec/audio_book_creator/runner_spec.rb +65 -0
  45. data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
  46. data/spec/audio_book_creator/speaker_spec.rb +105 -0
  47. data/spec/audio_book_creator/spider_spec.rb +172 -0
  48. data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
  49. data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
  50. data/spec/audio_book_creator/url_filter_spec.rb +52 -0
  51. data/spec/audio_book_creator/version_spec.rb +5 -0
  52. data/spec/audio_book_creator/web_spec.rb +66 -0
  53. data/spec/audio_book_creator_spec.rb +25 -0
  54. data/spec/spec_helper.rb +106 -0
  55. data/spec/support/test_logger.rb +21 -0
  56. metadata +238 -0
@@ -0,0 +1,60 @@
1
+ require 'nokogiri'
2
+ require 'uri'
3
+
4
+ module AudioBookCreator
5
+ class Spider
6
+ include Logging
7
+
8
+ # @!attribute web
9
+ # @return Hash access to the world wide web
10
+ attr_accessor :web
11
+ attr_accessor :invalid_urls
12
+
13
+ attr_accessor :page_def
14
+
15
+ def initialize(page_def, web = {}, invalid_urls = {})
16
+ @page_def = page_def
17
+ @web = web
18
+ @invalid_urls = invalid_urls
19
+ end
20
+
21
+ def run(chapters)
22
+ outstanding = CascadingArray.new([], chapters.map { |o| uri(o) })
23
+ visited = []
24
+
25
+ while (url = outstanding.shift)
26
+ contents, new_pages, new_chapters = visit_page(url)
27
+ visited << contents
28
+ new_pages.each do |href|
29
+ outstanding.add_unique_page(href) unless invalid_urls.include?(href)
30
+ end
31
+ new_chapters.each do |href|
32
+ outstanding.add_unique_chapter(href) unless invalid_urls.include?(href)
33
+ end
34
+ end
35
+ visited
36
+ end
37
+
38
+ private
39
+
40
+ # this one hangs on mutations
41
+ def visit_page(url)
42
+ logger.info { "visit #{url}" }
43
+ page = web[url.to_s]
44
+ doc = Nokogiri::HTML(page)
45
+ [
46
+ page,
47
+ page_def.page_links(doc) { |a| uri(url, a["href"]) },
48
+ page_def.chapter_links(doc) { |a| uri(url, a["href"]) }
49
+ ]
50
+ end
51
+
52
+ # raises URI::Error (BadURIError)
53
+ def uri(url, alt = nil)
54
+ url = URI.parse(url) unless url.is_a?(URI)
55
+ url += alt if alt
56
+ url.fragment = nil # remove #x part of url
57
+ url
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,16 @@
1
+ module AudioBookCreator
2
+ class SpokenChapter
3
+ attr_accessor :title, :filename
4
+
5
+ def initialize(title, filename)
6
+ @title = title
7
+ @filename = filename
8
+ end
9
+
10
+ def ==(other)
11
+ other.kind_of?(SpokenChapter) &&
12
+ other.title.eql?(title) && other.filename.eql?(filename)
13
+ end
14
+ alias :eql? :==
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ module AudioBookCreator
2
+ class SurferDef
3
+ attr_accessor :host
4
+ attr_accessor :max
5
+ attr_accessor :regen_html
6
+ attr_accessor :cache_filename
7
+
8
+ def initialize(host = nil, max = nil, regen_html = nil, cache_filename = nil)
9
+ @host = host
10
+ @max = max
11
+ @regen_html = regen_html
12
+ @cache_filename = cache_filename
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,33 @@
1
+ require 'uri'
2
+
3
+ module AudioBookCreator
4
+ class UrlFilter
5
+ include Logging
6
+ attr_accessor :host
7
+
8
+ def initialize(host)
9
+ self.host = host
10
+ end
11
+
12
+ def host=(url)
13
+ @host = url && (url.is_a?(URI) ? url : URI.parse(url)).host
14
+ end
15
+
16
+ # return true if this is invalid
17
+ def include?(url)
18
+ if !valid_extensions.include?(File.extname(url.path))
19
+ logger.warn { "ignoring bad file extension #{url}" }
20
+ raise "bad file extension"
21
+ elsif host && (host != url.host)
22
+ logger.warn { "ignoring remote url #{url}" }
23
+ raise "remote url #{url}"
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def valid_extensions
30
+ ["", '.html', '.htm', '.php', '.jsp']
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module AudioBookCreator
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,44 @@
1
+ require 'open-uri'
2
+ require 'uri'
3
+
4
+ module AudioBookCreator
5
+ class Web
6
+ include Logging
7
+
8
+ # @!attribute max
9
+ # @return Integer the maximum number of pages to visit
10
+ attr_accessor :max
11
+
12
+ # @!attribute count
13
+ # @return Integer the number of pages visited
14
+ attr_accessor :count
15
+
16
+ def initialize(max = nil)
17
+ @max = max
18
+ @count = 0
19
+ end
20
+
21
+ def [](url)
22
+ @count += 1
23
+ log_page(url)
24
+ check_limit
25
+ open(url.to_s).read
26
+ end
27
+
28
+ private
29
+
30
+ def log_page(url)
31
+ logger.info do
32
+ max ? "fetch #{url} [#{count}/#{max}]" : "fetch #{url} [#{count}]"
33
+ end
34
+ end
35
+
36
+ def check_limit
37
+ raise "visited #{max} pages" if over_limit?
38
+ end
39
+
40
+ def over_limit?
41
+ max && count > max
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,103 @@
1
+ require "spec_helper"
2
+
3
+ describe AudioBookCreator::Binder do
4
+ let(:book_def) { AudioBookCreator::BookDef.new("title", nil, "dir", nil, false) }
5
+ let(:speaker_def) { AudioBookCreator::SpeakerDef.new(regen_audio: false) }
6
+ subject { described_class.new(book_def, speaker_def) }
7
+
8
+ it "should require a chapter" do
9
+ expect_runner.not_to receive(:system)
10
+ expect { subject.create([]) }.to raise_error
11
+ end
12
+
13
+ it "should do nothing if m4b exists" do
14
+ expect(File).to receive(:exist?).with("title.m4b").and_return(true)
15
+
16
+ expect_runner.not_to receive(:system)
17
+ subject.create([spoken_chapter])
18
+ end
19
+
20
+ it "should base filename on title and sanitize it" do
21
+ book_def.title = "the title"
22
+ expect_runner.not_to receive(:system)
23
+ expect(File).to receive(:exist?).with("the-title.m4b").and_return(true)
24
+ subject.create([spoken_chapter])
25
+ end
26
+
27
+ it "should create text and m4a file" do
28
+ expect(File).to receive(:exist?).with("title.m4b").and_return(false)
29
+
30
+ expect_runner.to receive(:system)
31
+ .with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
32
+ "-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
33
+ "@the title@", "dir/chapter01.m4a").and_return(true)
34
+ subject.create([spoken_chapter])
35
+ end
36
+
37
+ context "with itunes" do
38
+ before { book_def.itunes = true}
39
+ subject { described_class.new(book_def, speaker_def) }
40
+ it "should load into itunes" do
41
+ expect(File).to receive(:exist?).with("title.m4b").and_return(false)
42
+
43
+ expect_runner.to receive(:system)
44
+ .with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
45
+ "-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
46
+ "@the title@", "dir/chapter01.m4a", "-A").and_return(true)
47
+ subject.create([spoken_chapter])
48
+ end
49
+ end
50
+
51
+ it "outputs messages if set to verbose" do
52
+ enable_logging
53
+ expect(File).to receive(:exist?).and_return(false)
54
+
55
+ expect_runner.to receive(:system).and_return(true)
56
+
57
+ subject.create([spoken_chapter])
58
+ expect_to_have_logged(/^run:/, "", "","success")
59
+ end
60
+
61
+ it "outputs no messages if set to non verbose" do
62
+ expect(File).to receive(:exist?).and_return(false)
63
+
64
+ expect_runner.to receive(:system).and_return(true)
65
+ subject.create([spoken_chapter])
66
+ expect_to_have_logged()
67
+ end
68
+
69
+ context "with force" do
70
+ before { speaker_def.regen_audio = true }
71
+ subject { described_class.new(book_def, speaker_def) }
72
+
73
+ it "should create m4a if exists" do
74
+ expect(File).not_to receive(:exist?)
75
+
76
+ expect_runner.to receive(:system).and_return(true)
77
+ subject.create([spoken_chapter])
78
+ end
79
+ end
80
+
81
+ context "with false force" do
82
+ subject { described_class.new(book_def, speaker_def) }
83
+
84
+ it "should not create m4a if exists" do
85
+ expect(File).to receive(:exist?).and_return(true)
86
+
87
+ expect_runner.not_to receive(:system)
88
+ subject.create([spoken_chapter])
89
+ end
90
+ end
91
+
92
+ it "requires chapters to be passed in" do
93
+ expect_runner.not_to receive(:system)
94
+ expect { subject.create(nil) }.to raise_error("No Chapters")
95
+ expect { subject.create([]) }.to raise_error("No Chapters")
96
+ end
97
+
98
+ private
99
+
100
+ def expect_runner
101
+ expect_any_instance_of(AudioBookCreator::Runner)
102
+ end
103
+ end
@@ -0,0 +1,63 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::BookCreator do
4
+
5
+ # this is kinda testing the implementation
6
+ context "#run" do
7
+ let(:spider) { double(:spider) }
8
+ let(:editor) { double(:editor) }
9
+ let(:speaker) { double(:speaker) }
10
+ let(:binder) { double(:binder) }
11
+
12
+ subject { described_class.new(spider, editor, speaker, binder) }
13
+
14
+ it "should call all the constructors and components" do
15
+
16
+ outstanding = ["http://site.com/"]
17
+ page_contents = ["site.com contents"]
18
+ chapters = [
19
+ chapter("contents1", "title1", 1),
20
+ chapter("contents2", "title2", 2)
21
+ ]
22
+ spoken_chapters = [
23
+ spoken_chapter("title1", "dir/chapter01.m4a"),
24
+ spoken_chapter("title2", "dir/chapter02.m4a")
25
+ ]
26
+ expect(speaker).to receive(:make_directory_structure)
27
+ expect(spider).to receive(:run).with(outstanding).and_return(page_contents)
28
+ expect(editor).to receive(:parse).with(page_contents).and_return(chapters)
29
+ expect(speaker).to receive(:say).with(chapters.first).and_return(spoken_chapters.first)
30
+ expect(speaker).to receive(:say).with(chapters.last).and_return(spoken_chapters.last)
31
+ expect(binder).to receive(:create).with(spoken_chapters)
32
+
33
+ subject.create(outstanding)
34
+ end
35
+ end
36
+
37
+ # taken from cli
38
+ # context "real object" do
39
+ # it "spiders the web" do
40
+ # # spider:
41
+ # expect_visit_page("http://site.com/", "<h1>title</h1>", "<p>contents</p>")
42
+ # # speaker:
43
+ # expect(File).to receive(:exist?).with("title").and_return(true)
44
+ # expect(File).to receive(:exist?).with("title/chapter01.txt").and_return(true)
45
+ # expect(File).to receive(:exist?).with("title/chapter01.m4a").and_return(true)
46
+ # # binder
47
+ # expect(File).to receive(:exist?).with("title.m4b").and_return(true)
48
+ # # chain parse and run to mimic bin/audio_book_creator
49
+ # subject.parse(%w(title http://site.com/ -v)).run
50
+ # expect(AudioBookCreator.logger.level).to eq(Logger::INFO)
51
+ # end
52
+ # end
53
+
54
+ # private
55
+
56
+ # # NOTE: this uses any_instance because we don't want to instantiate anything
57
+ # # could assign web and use a double instead
58
+ # def expect_visit_page(url, *args)
59
+ # url = site(url)
60
+ # expect_any_instance_of(AudioBookCreator::Web).to receive(:[])
61
+ # .with(url).and_return(page(url, *args))
62
+ # end
63
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::BookDef do
4
+ context "with no parameter" do
5
+ subject { described_class.new }
6
+ it { expect(subject.title).to eq(nil) }
7
+ it { expect(subject.author).to eq("Vicki") }
8
+ it { expect(subject.urls).to be_nil }
9
+ it { expect(subject.itunes).to be_falsy }
10
+ end
11
+
12
+ context "with title" do
13
+ subject { described_class.new("dir") }
14
+ it { expect(subject.base_dir).to eq("dir") }
15
+ it { expect(subject.title).to eq("dir") }
16
+ end
17
+
18
+ context "with all parameters" do
19
+ subject { described_class.new("the title", "author", "dir", %w(a b), true) }
20
+ it { expect(subject.base_dir).to eq("dir") }
21
+ it { expect(subject.title).to eq("the title") }
22
+ it { expect(subject.author).to eq("author") }
23
+ it { expect(subject.filename).to eq("the-title.m4b") }
24
+ it { expect(subject.urls).to eq(%w(a b)) }
25
+ it { expect(subject.itunes).to be_truthy }
26
+ end
27
+
28
+ context "with derived title" do
29
+ subject { described_class.new("the title", "author", nil, nil) }
30
+ it { expect(subject.base_dir).to eq("the-title") }
31
+ end
32
+
33
+ context "#unique_urls" do
34
+ subject { described_class.new("dir") }
35
+ before { subject.urls = %w(http://site.com/title http://site.com/title http://site.com/title2) }
36
+ it { expect(subject.unique_urls).to eq(%w(http://site.com/title http://site.com/title2)) }
37
+ end
38
+
39
+ context ".sanitize_filename" do
40
+ subject { described_class }
41
+ it "should join strings" do
42
+ expect(subject.sanitize_filename("title", "jpg")).to eq("title.jpg")
43
+ end
44
+
45
+ it "should handle arrays" do
46
+ expect(subject.sanitize_filename(%w(title jpg))).to eq("title.jpg")
47
+ end
48
+
49
+ it "should ignore nils" do
50
+ expect(subject.sanitize_filename("title", nil)).to eq("title")
51
+ end
52
+
53
+ it "should support titles with spaces" do
54
+ expect(subject.sanitize_filename(%{title ((for "you", "Amy", and "John"))})).to eq("title-for-you-Amy-and-John")
55
+ end
56
+
57
+ it "should support titles with extra stuff" do
58
+ expect(subject.sanitize_filename("title,for!")).to eq("title-for")
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::CachedHash do
4
+ let(:cache) { {} }
5
+ let(:main) { {} }
6
+ subject { described_class.new(cache, main) }
7
+
8
+ context "#with cached content" do
9
+ let(:cache) { {:key => "val"} }
10
+ it { expect(subject[:key]).to eq("val") }
11
+ it { subject[:key] ; expect(main[:key]).to be_nil }
12
+ end
13
+
14
+ context "#with main content" do
15
+ let(:main) { {:key => "val"} }
16
+ it { expect(subject[:key]).to eq("val") }
17
+ it { subject[:key] ; expect(cache[:key]).to eq("val") }
18
+ end
19
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::CascadingArray do
4
+ let(:pages) { [:p1, :p2] }
5
+ let(:chapters) { [:ch1, :ch2] }
6
+ subject { described_class.new(pages, chapters) }
7
+
8
+ it { is_expected.to be_include(:p1) }
9
+ it { is_expected.to be_include(:ch2) }
10
+ it { is_expected.not_to be_include(:ch4) }
11
+
12
+
13
+ it "includes even after it is empty" do
14
+ 4.times { subject.shift }
15
+ expect(subject.shift).to be_nil
16
+ expect(subject).to be_include(:p1)
17
+ expect(subject).to be_include(:ch1)
18
+ end
19
+
20
+ it "includes later added values" do
21
+ subject.add_page(:p3)
22
+ subject.add_chapter(:c3)
23
+ expect(subject.each.to_a).to eq([:p1,:p2,:p3,:ch1,:ch2,:c3])
24
+ expect(subject).to be_include(:p3)
25
+ expect(subject).to be_include(:c3)
26
+ end
27
+
28
+ it "takes primary before secondary" do
29
+ expect(subject.shift).to eq(:p1)
30
+ expect(subject.shift).to eq(:p2)
31
+ expect(subject.shift).to eq(:ch1)
32
+ expect(subject.shift).to eq(:ch2)
33
+ expect(subject.shift).to be_nil
34
+ end
35
+
36
+ it "enumerates" do
37
+ ret = []
38
+ subject.each { |x| ret << x }
39
+ expect(ret).to eq([:p1,:p2,:ch1,:ch2])
40
+ end
41
+
42
+ it "non block enumerates" do
43
+ expect(subject.each).to be_a(Enumerator)
44
+ expect(subject.each.to_a).to eq([:p1,:p2,:ch1,:ch2])
45
+ end
46
+
47
+
48
+ it "puts pages into primary" do
49
+ subject.add_page(:p3)
50
+ expect(subject.each.to_a).to eq([:p1,:p2,:p3,:ch1,:ch2])
51
+ end
52
+
53
+ it "puts non duplicate pages into primary" do
54
+ subject.add_unique_page(:p3)
55
+ subject.add_unique_page(:p3)
56
+ expect(subject.each.to_a).to eq([:p1,:p2,:p3,:ch1,:ch2])
57
+ end
58
+
59
+ it "puts non duplicate chapters into secondary" do
60
+ subject.add_unique_chapter(:ch3)
61
+ subject.add_unique_chapter(:ch3)
62
+ expect(subject.each.to_a).to eq([:p1,:p2,:ch1,:ch2,:ch3])
63
+ end
64
+ end