audio_book_creator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +4 -0
  4. data/.travis.yml +8 -0
  5. data/Gemfile +8 -0
  6. data/README.md +60 -0
  7. data/Rakefile +8 -0
  8. data/audio_book_creator.gemspec +31 -0
  9. data/bin/audio_book_creator +6 -0
  10. data/lib/audio_book_creator.rb +59 -0
  11. data/lib/audio_book_creator/binder.rb +61 -0
  12. data/lib/audio_book_creator/book_creator.rb +31 -0
  13. data/lib/audio_book_creator/book_def.rb +36 -0
  14. data/lib/audio_book_creator/cached_hash.rb +20 -0
  15. data/lib/audio_book_creator/cascading_array.rb +57 -0
  16. data/lib/audio_book_creator/chapter.rb +33 -0
  17. data/lib/audio_book_creator/cli.rb +119 -0
  18. data/lib/audio_book_creator/conductor.rb +67 -0
  19. data/lib/audio_book_creator/editor.rb +20 -0
  20. data/lib/audio_book_creator/logging.rb +7 -0
  21. data/lib/audio_book_creator/page_db.rb +42 -0
  22. data/lib/audio_book_creator/page_def.rb +31 -0
  23. data/lib/audio_book_creator/runner.rb +22 -0
  24. data/lib/audio_book_creator/speaker.rb +54 -0
  25. data/lib/audio_book_creator/speaker_def.rb +39 -0
  26. data/lib/audio_book_creator/spider.rb +60 -0
  27. data/lib/audio_book_creator/spoken_chapter.rb +16 -0
  28. data/lib/audio_book_creator/surfer_def.rb +15 -0
  29. data/lib/audio_book_creator/url_filter.rb +33 -0
  30. data/lib/audio_book_creator/version.rb +3 -0
  31. data/lib/audio_book_creator/web.rb +44 -0
  32. data/spec/audio_book_creator/binder_spec.rb +103 -0
  33. data/spec/audio_book_creator/book_creator_spec.rb +63 -0
  34. data/spec/audio_book_creator/book_def_spec.rb +61 -0
  35. data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
  36. data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
  37. data/spec/audio_book_creator/chapter_spec.rb +80 -0
  38. data/spec/audio_book_creator/cli_spec.rb +274 -0
  39. data/spec/audio_book_creator/conductor_spec.rb +102 -0
  40. data/spec/audio_book_creator/editor_spec.rb +39 -0
  41. data/spec/audio_book_creator/logging_spec.rb +21 -0
  42. data/spec/audio_book_creator/page_db_spec.rb +74 -0
  43. data/spec/audio_book_creator/page_def_spec.rb +79 -0
  44. data/spec/audio_book_creator/runner_spec.rb +65 -0
  45. data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
  46. data/spec/audio_book_creator/speaker_spec.rb +105 -0
  47. data/spec/audio_book_creator/spider_spec.rb +172 -0
  48. data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
  49. data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
  50. data/spec/audio_book_creator/url_filter_spec.rb +52 -0
  51. data/spec/audio_book_creator/version_spec.rb +5 -0
  52. data/spec/audio_book_creator/web_spec.rb +66 -0
  53. data/spec/audio_book_creator_spec.rb +25 -0
  54. data/spec/spec_helper.rb +106 -0
  55. data/spec/support/test_logger.rb +21 -0
  56. metadata +238 -0
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::Editor do
4
+ let(:page_def) { AudioBookCreator::PageDef.new("h1", "p") }
5
+ subject { described_class.new(page_def) }
6
+ let(:chapter1) { chapter("first\n\nsecond", "the title") }
7
+ it "should generate a page" do
8
+ expect(subject.parse([page("page1", "<h1>the title</h1>",
9
+ "<p>first</p>", "<p>second</p>")
10
+ ])).to eq([chapter1])
11
+ end
12
+
13
+ it "should respect content path" do
14
+ page_def.title_path = "h3"
15
+ page_def.body_path = "#story p"
16
+ expect(subject.parse([page("page1", "<h3>the title</h3>",
17
+ "<div id='story'>", "<p>first</p>", "<p>second</p>", "</div>",
18
+ "<p>bad</p>")
19
+ ])).to eq([chapter1])
20
+ end
21
+
22
+ it "should ignore body formatting" do
23
+ expect(subject.parse([page("page1", "<h1>the title</h1>",
24
+ "<p><a href='#this'>first</a></p>", "<p><b>second</b></p>")
25
+ ])).to eq([chapter1])
26
+ end
27
+
28
+ it "should parse multiple pages" do
29
+ expect(subject.parse([page("page1", "<h1>p1</h1>", "<p>first</p>"),
30
+ page("page2", "<h1>p2</h1>", "<p>second</p>"),
31
+ ])).to eq([chapter("first", "p1", 1), chapter("second", "p2", 2)])
32
+ end
33
+
34
+ it "should default the title if none found" do
35
+ expect(subject.parse([page("page1", "<p>first</p>"),
36
+ page("page2", "<p>second</p>"),
37
+ ])).to eq([chapter("first", "Chapter 1", 1), chapter("second", "Chapter 2", 2)])
38
+ end
39
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ describe AudioBookCreator::Logging do
4
+ subject { Class.new.tap { |c| c.send(:include, described_class) }.new}
5
+ it "should not log strings when verbose is off" do
6
+ subject.logger.info "phrase"
7
+ expect_to_have_logged()
8
+ end
9
+
10
+ it "should log strings" do
11
+ enable_logging
12
+ subject.logger.info "phrase"
13
+ expect_to_have_logged("phrase")
14
+ end
15
+
16
+ it "should log blocks" do
17
+ enable_logging
18
+ subject.logger.info { "phrase" }
19
+ expect_to_have_logged("phrase")
20
+ end
21
+ end
@@ -0,0 +1,74 @@
1
+ require "spec_helper"
2
+ require 'tempfile'
3
+
4
+ describe AudioBookCreator::PageDb do
5
+ subject { described_class.new(":memory:") }
6
+
7
+ # all of these tests are in memory
8
+ # this is the only test that depends upon it
9
+ context "with memory databases" do
10
+ it "does not create a file" do
11
+ # access key to trigger database creation
12
+ subject["key"]
13
+ expect(File).not_to be_exist(":memory:")
14
+ end
15
+ end
16
+
17
+ it "works" do
18
+ expect(subject).not_to be_nil
19
+ end
20
+
21
+ it "creates cache value" do
22
+ subject["key"] = "value"
23
+
24
+ expect(subject["key"]).to eq("value")
25
+ end
26
+
27
+ it "include good key" do
28
+ subject["key"] = "value"
29
+ expect(subject).to include("key")
30
+ end
31
+
32
+ it "doesnt include bad key" do
33
+ expect(subject).not_to include("key")
34
+ end
35
+
36
+ context "with prepopulated (file) database" do
37
+ let(:tmp) { Tempfile.new("db") }
38
+
39
+ before do
40
+ db = described_class.new(tmp.path)
41
+ db["key"] = "value"
42
+ end
43
+
44
+ after do
45
+ tmp.close
46
+ tmp.unlink
47
+ end
48
+
49
+ it "finds entry in previously created cache" do
50
+ db = described_class.new(tmp.path)
51
+ expect(db["key"]).to eq("value")
52
+ end
53
+
54
+ it "creates a file" do
55
+ expect(File.exist?(tmp.path)).to be_truthy
56
+ end
57
+ end
58
+
59
+ it "handles url keys" do
60
+ key = "http://the.web.site.com/path/to/cgi?param1=x&param2=y#substuff"
61
+ contents = "a" * 555
62
+ subject[key] = contents
63
+ expect(subject[key]).to eq(contents)
64
+ end
65
+
66
+ it "supports enumerable (map)" do
67
+ subject["keyc"] = "v"
68
+ subject["keya"] = "v"
69
+ subject["keyz"] = "v"
70
+
71
+ expect(subject.map { |(n, v)| "#{n}:#{v}" }).to eq(%w(keyc:v keya:v keyz:v))
72
+ end
73
+
74
+ end
@@ -0,0 +1,79 @@
1
+ require "spec_helper"
2
+
3
+ describe AudioBookCreator::PageDef do
4
+ context "with no parameter" do
5
+ subject { described_class.new() }
6
+ it { expect(subject.title_path).to eq("h1") }
7
+ it { expect(subject.body_path).to eq("p") }
8
+ it { expect(subject.link_path).to eq("a") }
9
+ it { expect(subject.chapter_path).to be_nil }
10
+ end
11
+
12
+ context "with all parameters" do
13
+ subject { described_class.new("h1.title", "div", "a.link", "a.chapter") }
14
+ it { expect(subject.title_path).to eq("h1.title") }
15
+ it { expect(subject.body_path).to eq("div") }
16
+ it { expect(subject.link_path).to eq("a.link") }
17
+ it { expect(subject.chapter_path).to eq("a.chapter") }
18
+ end
19
+
20
+ describe "#title" do
21
+ context "with no title" do
22
+ let(:page) { dom("<p></p>")}
23
+ it { expect(subject.title(page)).to be_nil}
24
+ end
25
+ context "with title" do
26
+ let(:page) { dom("<h1>title</h1>")}
27
+ it { expect(subject.title(page)).to eq("title")}
28
+ end
29
+ end
30
+
31
+ # NOTE: chapter uses array.join
32
+ describe "#body" do
33
+ context "with no body" do
34
+ let(:page) { dom("<h1></h1>")}
35
+ it { expect(subject.body(page)).to be_empty}
36
+ end
37
+ context "with body" do
38
+ let(:page) { dom("<p>p1</p>")}
39
+ it { expect(Array(subject.body(page)).join).to eq("p1") }
40
+ end
41
+ context "with many bodies" do
42
+ let(:page) { dom("<p>p1</p><p>p2</p><p>p3</p><p>p4</p>")}
43
+ it { expect(Array(subject.body(page)).join).to eq(%w(p1 p2 p3 p4).join) }
44
+ end
45
+ end
46
+
47
+ describe "#page_links" do
48
+ context "with no page_links" do
49
+ let(:page) { dom("<p></p>")}
50
+ it { expect(subject.page_links(page){ |r| r["href"] }).to be_empty}
51
+ end
52
+ context "with multiple page_links" do
53
+ let(:page) { dom("<a href='tgt1'>a</a><a href='tgt2'>a</a>")}
54
+ it { expect(subject.page_links(page){ |r| r["href"] }).to eq(%w(tgt1 tgt2))}
55
+ end
56
+ end
57
+
58
+ describe "#chapter_links" do
59
+ before { subject.chapter_path = "a.chapter"}
60
+ context "with no chapter_links" do
61
+ let(:page) { dom("<p></p>")}
62
+ it { expect(subject.chapter_links(page){ |r| r["href"] }).to be_empty }
63
+ end
64
+ context "with only page_links" do
65
+ let(:page) { dom("<p><a href='x'>x</a></p>")}
66
+ it { expect(subject.chapter_links(page){ |r| r["href"] }).to be_empty }
67
+ end
68
+ context "with multiple chapter_links" do
69
+ let(:page) { dom("<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
70
+ it { expect(subject.chapter_links(page){ |r| r["href"] }).to eq(%w(tgt1 tgt2)) }
71
+ end
72
+ context "with nil chapter_path" do
73
+ before { subject.chapter_path = nil }
74
+ let(:page) { dom("<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
75
+ it { expect(subject.chapter_links(page) { |r| r["href"] }).to be_empty }
76
+ end
77
+ end
78
+ end
79
+
@@ -0,0 +1,65 @@
1
+ require "spec_helper"
2
+
3
+ describe AudioBookCreator::Runner do
4
+ subject { described_class.new }
5
+
6
+ context "with successful command" do
7
+ it "runs commands with arguments" do
8
+ expect(subject).to receive(:system).with("cmd", "arg1", "arg2").and_return(true)
9
+ subject.run!("cmd", :params => %w(arg1 arg2))
10
+ end
11
+
12
+ it "runs commands with non string arguments" do
13
+ expect(subject).to receive(:system).with("cmd", "arg1", "1").and_return(true)
14
+ subject.run!("cmd", :params => ["arg1", 1])
15
+ end
16
+
17
+ it "runs commands with hashes and nested arrays" do
18
+ expect(subject).to receive(:system).with("cmd", "arg1", "a", "b").and_return(true)
19
+ subject.run!("cmd", :params => {arg1: %w(a b)})
20
+ end
21
+
22
+ it "runs commands with nils" do
23
+ expect(subject).to receive(:system).with("cmd", "arg1").and_return(true)
24
+ subject.run!("cmd", :params => {arg1: nil})
25
+ end
26
+
27
+ context "without verbose" do
28
+ it "doesnt log" do
29
+ expect(subject).to receive(:system).and_return(true)
30
+ subject.run!("cmd", :params => %w(arg1 arg2))
31
+ expect_to_have_logged()
32
+ end
33
+ end
34
+
35
+ context "with verbose" do
36
+ before { enable_logging }
37
+ it "logs messages" do
38
+ expect(subject).to receive(:system).and_return(true)
39
+ expect(subject.run!("cmd", :params => %w(arg1 arg2))).to be_truthy
40
+ expect_to_have_logged(/run: cmd arg1 arg2/, "", "", "success")
41
+ end
42
+ end
43
+ end
44
+
45
+ context "with failing command" do
46
+ it "returns false" do
47
+ expect(subject).to receive(:system).and_return(false)
48
+ expect(subject.run("cmd", :params => %w(arg1 arg2))).not_to be_truthy
49
+ end
50
+
51
+ it "raises exception" do
52
+ expect(subject).to receive(:system).and_return(false)
53
+ expect { subject.run!("cmd", :params => %w(arg1 arg2)) }.to raise_error(/trouble/)
54
+ end
55
+
56
+ context "with verbose" do
57
+ before { enable_logging }
58
+ it "logs messages" do
59
+ expect(subject).to receive(:system).and_return(false)
60
+ expect { subject.run!("cmd", :params => %w(arg1 arg2)) }.to raise_error(/trouble/)
61
+ expect_to_have_logged(/run.*cmd.*arg1 arg2/, "", "", "issue")
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+
3
+ describe AudioBookCreator::SpeakerDef do
4
+ context "with no parameters" do
5
+ subject { described_class.new }
6
+ # for speaking the chapter
7
+ it { expect(subject.voice).to eq("Vicki") }
8
+ it { expect(subject.rate).to eq(280) }
9
+ # for binding the book
10
+ it { expect(subject.channels).to eq(1) }
11
+ it { expect(subject.bit_rate).to eq(32) }
12
+ it { expect(subject.max_hours).to eq(7) }
13
+ it { expect(subject.sample_rate).to eq(22_050) }
14
+ it { expect(subject.regen_audio).to be_falsy }
15
+ end
16
+
17
+ context "with parameters" do
18
+ subject do
19
+ described_class.new(
20
+ voice: "Serena",
21
+ rate: 360,
22
+ channels: 2,
23
+ bit_rate: 64,
24
+ max_hours: 2,
25
+ sample_rate: 44100,
26
+ regen_audio: true,
27
+ )
28
+ end
29
+
30
+ it { expect(subject.voice).to eq("Serena") }
31
+ it { expect(subject.rate).to eq(360) }
32
+ # for binding the book
33
+ it { expect(subject.channels).to eq(2) }
34
+ it { expect(subject.bit_rate).to eq(64) }
35
+ it { expect(subject.max_hours).to eq(2) }
36
+ it { expect(subject.sample_rate).to eq(44_100) }
37
+ it { expect(subject.regen_audio).to be_truthy }
38
+ end
39
+ end
@@ -0,0 +1,105 @@
1
+ require "spec_helper"
2
+
3
+ describe AudioBookCreator::Speaker do
4
+ let(:book_def) { AudioBookCreator::BookDef.new("dir") }
5
+ let(:speaker_def) { AudioBookCreator::SpeakerDef.new(:regen_audio => false) }
6
+ subject { described_class.new(speaker_def, book_def) }
7
+ it "should require a non empty chapter" do
8
+ expect_runner.not_to receive(:system)
9
+ expect { subject.say(chapter(nil)) }.to raise_error
10
+ end
11
+
12
+ it "should do nothing if txt and mp4 file exist" do
13
+ expect(File).to receive(:exist?).with("dir/chapter01.txt").and_return(true)
14
+ expect(File).to receive(:exist?).with("dir/chapter01.m4a").and_return(true)
15
+
16
+ expect(File).not_to receive(:write)
17
+ expect_runner.not_to receive(:system)
18
+ expect(subject.say(chapter)).to eq(spoken_chapter("the title", "dir/chapter01.m4a"))
19
+ end
20
+
21
+ it "should create text and mp4 file" do
22
+ expect(File).to receive(:exist?).twice.and_return(false)
23
+ expect(File).to receive(:write).with("dir/chapter01.txt", "the title\n\ncontent\n")
24
+
25
+ expect_runner.to receive(:system)
26
+ .with("say", "-v", "Vicki", "-r", "280", "-f", "dir/chapter01.txt", "-o", "dir/chapter01.m4a").and_return(true)
27
+ subject.say(chapter)
28
+ end
29
+
30
+ it "doesnt print if not verbose" do
31
+ expect(File).to receive(:exist?).twice.and_return(false)
32
+ expect(File).to receive(:write)
33
+
34
+ expect_runner.to receive(:system).and_return(true)
35
+ subject.say(chapter)
36
+ expect_to_have_logged()
37
+ end
38
+
39
+ it "should output messages if set to verbose" do
40
+ enable_logging
41
+ expect(File).to receive(:exist?).twice.and_return(false)
42
+ expect(File).to receive(:write)
43
+
44
+ expect_runner.to receive(:system).and_return(true)
45
+ subject.say(chapter)
46
+ expect_to_have_logged(/^run:/, "", "", "success")
47
+ end
48
+
49
+ context "with force" do
50
+ before { speaker_def.regen_audio = true}
51
+ subject { described_class.new(speaker_def, book_def) }
52
+
53
+ it "should create text and mp4 file if they exist but are set to force" do
54
+ expect(File).not_to receive(:exist?)
55
+ expect(File).to receive(:write)
56
+
57
+ expect_runner.to receive(:system).and_return(true)
58
+ subject.say(chapter)
59
+ end
60
+ end
61
+
62
+ it "should freak if no chapters are passed in" do
63
+ expect_runner.not_to receive(:system)
64
+ expect { subject.say([]) }.to raise_error("Empty chapter")
65
+ end
66
+
67
+ context "#make_directory_structure" do
68
+ it "should create base directory" do
69
+ expect_runner.not_to receive(:system)
70
+ expect(File).to receive(:exist?).with(subject.book_def.base_dir).and_return(false)
71
+ expect(FileUtils).to receive(:mkdir).with(subject.book_def.base_dir)
72
+ subject.make_directory_structure
73
+ end
74
+
75
+ it "should not create base directory if it exists" do
76
+ expect_runner.not_to receive(:system)
77
+ expect(File).to receive(:exist?).with(subject.book_def.base_dir).and_return(true)
78
+ expect(FileUtils).not_to receive(:mkdir)
79
+ subject.make_directory_structure
80
+ end
81
+ end
82
+
83
+ context "#chapter_text_filename" do
84
+ let(:chapter) { AudioBookCreator::Chapter.new(number: 3) }
85
+
86
+ it do
87
+ expect_runner.not_to receive(:system)
88
+ expect(subject.chapter_text_filename(chapter)).to eq("dir/chapter03.txt")
89
+ end
90
+ end
91
+
92
+ context "#chapter_sound_filename" do
93
+ let(:chapter) { AudioBookCreator::Chapter.new(number: 2) }
94
+ it do
95
+ expect_runner.not_to receive(:system)
96
+ expect(subject.chapter_sound_filename(chapter)).to eq("dir/chapter02.m4a")
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ def expect_runner
103
+ expect_any_instance_of(AudioBookCreator::Runner)
104
+ end
105
+ end
@@ -0,0 +1,172 @@
1
+ require "spec_helper"
2
+
3
+ describe AudioBookCreator::Spider do
4
+ # set a max to prevent errors from causing infinite loops
5
+ let(:page_def) { AudioBookCreator::PageDef.new("h1", "p", "a.page", "a.chapter") }
6
+ let(:web) { {} }
7
+ let(:invalid_urls) { {} }
8
+ # NOTE: could use arrays here, but put caps to catch bugs
9
+ subject { described_class.new(page_def, web, invalid_urls) }
10
+
11
+ it "handles empty initializer" do
12
+ pristine = described_class.new(page_def)
13
+ expect(pristine.web).to be_a(Hash)
14
+ expect(pristine.invalid_urls).to be_a(Hash)
15
+ end
16
+
17
+ it "sets arguments" do
18
+ expect(subject.page_def).to eq(page_def)
19
+ expect(subject.web).to eq(web)
20
+ expect(subject.invalid_urls).not_to be_nil
21
+ end
22
+
23
+ context "#visit" do
24
+ it "visit urls" do
25
+ expect_visit_page "page1", "x"
26
+ expect(subject.run(uri(%w(page1)))).to eq([page(site("page1"),"x")])
27
+ end
28
+
29
+ it "visit string" do
30
+ expect_visit_page "page1", "x"
31
+ expect(subject.run(site(%w(page1)))).to eq([page(site("page1"),"x")])
32
+ end
33
+
34
+ it "visit multiple pages" do
35
+ expect_visit_page "page1"
36
+ expect_visit_page "page2"
37
+ expect(subject.run(uri(%w(page1 page2))))
38
+ .to eq([page(site("page1")), page(site("page2"))])
39
+ end
40
+
41
+ it "visit unique list of pages" do
42
+ expect_visit_page "page1", link("page2"), link("page2")
43
+ expect_visit_page "page2"
44
+ expect(subject.run uri(%w(page1)))
45
+ .to eq([page(site("page1"),link("page2"), link("page2")), page(site("page2"))])
46
+ end
47
+
48
+ it "skips loops from uri" do
49
+ expect_visit_page "page1", link("page1")
50
+ subject.run uri(%w(page1))
51
+ end
52
+
53
+ it "skips loops from string" do
54
+ expect_visit_page "page1", link("page1")
55
+ subject.run site(%w(page1))
56
+ end
57
+
58
+ it "also accepts string urls" do
59
+ expect_visit_page "page1"
60
+ subject.run site(%w(page1))
61
+ end
62
+
63
+ it "skips empty urls" do
64
+ expect_visit_page "page1", "<a>x</a>"
65
+ subject.run site(%w(page1))
66
+ end
67
+
68
+ it "skips blank urls" do
69
+ expect_visit_page "page1", "<a href=\"\">x</a>"
70
+ subject.run site(%w(page1))
71
+ end
72
+ end
73
+
74
+ it "follows relative links" do
75
+ expect_visit_page("page1", link("page2"))
76
+ expect_visit_page("page2")
77
+ subject.run uri(%w(page1))
78
+ end
79
+
80
+ it "follows absolute links" do
81
+ expect_visit_page("page1", link(site("page2")))
82
+ expect_visit_page("page2")
83
+ subject.run uri(%w(page1))
84
+ end
85
+
86
+ # in the end of the day, these links reference the local page which is ignored, so no code necessary
87
+ it "skips empty, blank, and local ref links" do
88
+ p1_contents = "<a id='a1'>a1</a>", "<a href=''>a2</a>", "<a href='#a'>x</a>", link("page2")
89
+ expect_visit_page("page1", *p1_contents)
90
+ expect_visit_page("page2")
91
+ expect(subject.run uri(%w(page1))).to eq([page(site("page1"), *p1_contents), page(site("page2"))])
92
+ end
93
+
94
+ it "visits all pages once (and only once)" do
95
+ expect_visit_page("page1", link("page2"))
96
+ expect_visit_page("page2", link("page1"), link("page3"))
97
+ expect_visit_page("page3", link("page1"), link("page2"))
98
+ subject.run uri(%w(page1))
99
+ end
100
+
101
+ it "visits all chapters once (and only once)" do
102
+ expect_visit_page("page1", link("page2", "chapter"))
103
+ expect_visit_page("page2", link("page1", "chapter"), link("page3", "chapter"))
104
+ expect_visit_page("page3", link("page1", "chapter"), link("page2", "chapter"))
105
+ subject.run uri(%w(page1))
106
+ end
107
+
108
+ it "visits chapters too" do
109
+ expect_visit_page("page1", link("page2", "chapter"))
110
+ expect_visit_page("page2")
111
+ subject.run uri(%w(page1))
112
+ end
113
+
114
+ it "leverages page_def to determine good links" do
115
+ page_def.link_path = ".good a"
116
+ expect_visit_page("page1", "<div class='good'>", link("good"), "</div>", link("bad"))
117
+ expect_visit_page("good")
118
+ subject.run uri(%w(page1))
119
+ end
120
+
121
+ it "ignores #target in url" do
122
+ expect_visit_page("page1", link("page1#target"))
123
+ subject.run uri(%w(page1))
124
+ end
125
+
126
+ it "skips bad urls" do
127
+ expect_visit_page("page1", link("%@")) # it never gets to call a second time
128
+ expect { subject.run uri(%w(page1)) }.to raise_error(/bad URI/)
129
+ end
130
+
131
+ it "uses url filter for pages" do
132
+ expect(invalid_urls).to receive(:include?).with(URI.parse(site("x.pdf"))).and_raise("bad file extension")
133
+ expect_visit_page("page1", link("x.pdf"))
134
+ expect { subject.run uri(%w(page1)) }.to raise_error("bad file extension")
135
+ end
136
+
137
+ it "uses url filter for chapters" do
138
+ expect(invalid_urls).to receive(:include?).with(URI.parse(site("x.pdf"))).and_raise("bad file extension")
139
+ expect_visit_page("page1", link("x.pdf", "chapter"))
140
+ expect { subject.run uri(%w(page1)) }.to raise_error("bad file extension")
141
+ end
142
+
143
+ context "with invalid_urls" do
144
+ it "skips invalid_urls" do
145
+ expect(subject.invalid_urls).to receive(:include?).with(uri("bad")).and_return(true)
146
+ expect_visit_page("page1", link("bad"))
147
+ subject.run uri(%w(page1))
148
+ end
149
+ end
150
+
151
+ context "logging" do
152
+ it "logs page visits" do
153
+ enable_logging
154
+ expect_visit_page("page1")
155
+ subject.run uri(%w(page1))
156
+ expect_to_have_logged("visit #{uri("page1")}")
157
+ end
158
+
159
+ it "doesnt log page visits" do
160
+ expect_visit_page("page1")
161
+ subject.run uri(%w(page1))
162
+ expect_to_have_logged()
163
+ end
164
+ end
165
+
166
+ private
167
+
168
+ def expect_visit_page(url, *args)
169
+ url = site(url)
170
+ expect(web).to receive(:[]).with(url.to_s).and_return(page(url, *args))
171
+ end
172
+ end