audio_book_creator 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.rspec +4 -0
- data/.travis.yml +8 -0
- data/Gemfile +8 -0
- data/README.md +60 -0
- data/Rakefile +8 -0
- data/audio_book_creator.gemspec +31 -0
- data/bin/audio_book_creator +6 -0
- data/lib/audio_book_creator.rb +59 -0
- data/lib/audio_book_creator/binder.rb +61 -0
- data/lib/audio_book_creator/book_creator.rb +31 -0
- data/lib/audio_book_creator/book_def.rb +36 -0
- data/lib/audio_book_creator/cached_hash.rb +20 -0
- data/lib/audio_book_creator/cascading_array.rb +57 -0
- data/lib/audio_book_creator/chapter.rb +33 -0
- data/lib/audio_book_creator/cli.rb +119 -0
- data/lib/audio_book_creator/conductor.rb +67 -0
- data/lib/audio_book_creator/editor.rb +20 -0
- data/lib/audio_book_creator/logging.rb +7 -0
- data/lib/audio_book_creator/page_db.rb +42 -0
- data/lib/audio_book_creator/page_def.rb +31 -0
- data/lib/audio_book_creator/runner.rb +22 -0
- data/lib/audio_book_creator/speaker.rb +54 -0
- data/lib/audio_book_creator/speaker_def.rb +39 -0
- data/lib/audio_book_creator/spider.rb +60 -0
- data/lib/audio_book_creator/spoken_chapter.rb +16 -0
- data/lib/audio_book_creator/surfer_def.rb +15 -0
- data/lib/audio_book_creator/url_filter.rb +33 -0
- data/lib/audio_book_creator/version.rb +3 -0
- data/lib/audio_book_creator/web.rb +44 -0
- data/spec/audio_book_creator/binder_spec.rb +103 -0
- data/spec/audio_book_creator/book_creator_spec.rb +63 -0
- data/spec/audio_book_creator/book_def_spec.rb +61 -0
- data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
- data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
- data/spec/audio_book_creator/chapter_spec.rb +80 -0
- data/spec/audio_book_creator/cli_spec.rb +274 -0
- data/spec/audio_book_creator/conductor_spec.rb +102 -0
- data/spec/audio_book_creator/editor_spec.rb +39 -0
- data/spec/audio_book_creator/logging_spec.rb +21 -0
- data/spec/audio_book_creator/page_db_spec.rb +74 -0
- data/spec/audio_book_creator/page_def_spec.rb +79 -0
- data/spec/audio_book_creator/runner_spec.rb +65 -0
- data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
- data/spec/audio_book_creator/speaker_spec.rb +105 -0
- data/spec/audio_book_creator/spider_spec.rb +172 -0
- data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
- data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
- data/spec/audio_book_creator/url_filter_spec.rb +52 -0
- data/spec/audio_book_creator/version_spec.rb +5 -0
- data/spec/audio_book_creator/web_spec.rb +66 -0
- data/spec/audio_book_creator_spec.rb +25 -0
- data/spec/spec_helper.rb +106 -0
- data/spec/support/test_logger.rb +21 -0
- metadata +238 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AudioBookCreator::Editor do
|
4
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p") }
|
5
|
+
subject { described_class.new(page_def) }
|
6
|
+
let(:chapter1) { chapter("first\n\nsecond", "the title") }
|
7
|
+
it "should generate a page" do
|
8
|
+
expect(subject.parse([page("page1", "<h1>the title</h1>",
|
9
|
+
"<p>first</p>", "<p>second</p>")
|
10
|
+
])).to eq([chapter1])
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should respect content path" do
|
14
|
+
page_def.title_path = "h3"
|
15
|
+
page_def.body_path = "#story p"
|
16
|
+
expect(subject.parse([page("page1", "<h3>the title</h3>",
|
17
|
+
"<div id='story'>", "<p>first</p>", "<p>second</p>", "</div>",
|
18
|
+
"<p>bad</p>")
|
19
|
+
])).to eq([chapter1])
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should ignore body formatting" do
|
23
|
+
expect(subject.parse([page("page1", "<h1>the title</h1>",
|
24
|
+
"<p><a href='#this'>first</a></p>", "<p><b>second</b></p>")
|
25
|
+
])).to eq([chapter1])
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should parse multiple pages" do
|
29
|
+
expect(subject.parse([page("page1", "<h1>p1</h1>", "<p>first</p>"),
|
30
|
+
page("page2", "<h1>p2</h1>", "<p>second</p>"),
|
31
|
+
])).to eq([chapter("first", "p1", 1), chapter("second", "p2", 2)])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should default the title if none found" do
|
35
|
+
expect(subject.parse([page("page1", "<p>first</p>"),
|
36
|
+
page("page2", "<p>second</p>"),
|
37
|
+
])).to eq([chapter("first", "Chapter 1", 1), chapter("second", "Chapter 2", 2)])
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Logging do
|
4
|
+
subject { Class.new.tap { |c| c.send(:include, described_class) }.new}
|
5
|
+
it "should not log strings when verbose is off" do
|
6
|
+
subject.logger.info "phrase"
|
7
|
+
expect_to_have_logged()
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should log strings" do
|
11
|
+
enable_logging
|
12
|
+
subject.logger.info "phrase"
|
13
|
+
expect_to_have_logged("phrase")
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should log blocks" do
|
17
|
+
enable_logging
|
18
|
+
subject.logger.info { "phrase" }
|
19
|
+
expect_to_have_logged("phrase")
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
describe AudioBookCreator::PageDb do
|
5
|
+
subject { described_class.new(":memory:") }
|
6
|
+
|
7
|
+
# all of these tests are in memory
|
8
|
+
# this is the only test that depends upon it
|
9
|
+
context "with memory databases" do
|
10
|
+
it "does not create a file" do
|
11
|
+
# access key to trigger database creation
|
12
|
+
subject["key"]
|
13
|
+
expect(File).not_to be_exist(":memory:")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
it "works" do
|
18
|
+
expect(subject).not_to be_nil
|
19
|
+
end
|
20
|
+
|
21
|
+
it "creates cache value" do
|
22
|
+
subject["key"] = "value"
|
23
|
+
|
24
|
+
expect(subject["key"]).to eq("value")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "include good key" do
|
28
|
+
subject["key"] = "value"
|
29
|
+
expect(subject).to include("key")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "doesnt include bad key" do
|
33
|
+
expect(subject).not_to include("key")
|
34
|
+
end
|
35
|
+
|
36
|
+
context "with prepopulated (file) database" do
|
37
|
+
let(:tmp) { Tempfile.new("db") }
|
38
|
+
|
39
|
+
before do
|
40
|
+
db = described_class.new(tmp.path)
|
41
|
+
db["key"] = "value"
|
42
|
+
end
|
43
|
+
|
44
|
+
after do
|
45
|
+
tmp.close
|
46
|
+
tmp.unlink
|
47
|
+
end
|
48
|
+
|
49
|
+
it "finds entry in previously created cache" do
|
50
|
+
db = described_class.new(tmp.path)
|
51
|
+
expect(db["key"]).to eq("value")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "creates a file" do
|
55
|
+
expect(File.exist?(tmp.path)).to be_truthy
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
it "handles url keys" do
|
60
|
+
key = "http://the.web.site.com/path/to/cgi?param1=x¶m2=y#substuff"
|
61
|
+
contents = "a" * 555
|
62
|
+
subject[key] = contents
|
63
|
+
expect(subject[key]).to eq(contents)
|
64
|
+
end
|
65
|
+
|
66
|
+
it "supports enumerable (map)" do
|
67
|
+
subject["keyc"] = "v"
|
68
|
+
subject["keya"] = "v"
|
69
|
+
subject["keyz"] = "v"
|
70
|
+
|
71
|
+
expect(subject.map { |(n, v)| "#{n}:#{v}" }).to eq(%w(keyc:v keya:v keyz:v))
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::PageDef do
|
4
|
+
context "with no parameter" do
|
5
|
+
subject { described_class.new() }
|
6
|
+
it { expect(subject.title_path).to eq("h1") }
|
7
|
+
it { expect(subject.body_path).to eq("p") }
|
8
|
+
it { expect(subject.link_path).to eq("a") }
|
9
|
+
it { expect(subject.chapter_path).to be_nil }
|
10
|
+
end
|
11
|
+
|
12
|
+
context "with all parameters" do
|
13
|
+
subject { described_class.new("h1.title", "div", "a.link", "a.chapter") }
|
14
|
+
it { expect(subject.title_path).to eq("h1.title") }
|
15
|
+
it { expect(subject.body_path).to eq("div") }
|
16
|
+
it { expect(subject.link_path).to eq("a.link") }
|
17
|
+
it { expect(subject.chapter_path).to eq("a.chapter") }
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "#title" do
|
21
|
+
context "with no title" do
|
22
|
+
let(:page) { dom("<p></p>")}
|
23
|
+
it { expect(subject.title(page)).to be_nil}
|
24
|
+
end
|
25
|
+
context "with title" do
|
26
|
+
let(:page) { dom("<h1>title</h1>")}
|
27
|
+
it { expect(subject.title(page)).to eq("title")}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# NOTE: chapter uses array.join
|
32
|
+
describe "#body" do
|
33
|
+
context "with no body" do
|
34
|
+
let(:page) { dom("<h1></h1>")}
|
35
|
+
it { expect(subject.body(page)).to be_empty}
|
36
|
+
end
|
37
|
+
context "with body" do
|
38
|
+
let(:page) { dom("<p>p1</p>")}
|
39
|
+
it { expect(Array(subject.body(page)).join).to eq("p1") }
|
40
|
+
end
|
41
|
+
context "with many bodies" do
|
42
|
+
let(:page) { dom("<p>p1</p><p>p2</p><p>p3</p><p>p4</p>")}
|
43
|
+
it { expect(Array(subject.body(page)).join).to eq(%w(p1 p2 p3 p4).join) }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#page_links" do
|
48
|
+
context "with no page_links" do
|
49
|
+
let(:page) { dom("<p></p>")}
|
50
|
+
it { expect(subject.page_links(page){ |r| r["href"] }).to be_empty}
|
51
|
+
end
|
52
|
+
context "with multiple page_links" do
|
53
|
+
let(:page) { dom("<a href='tgt1'>a</a><a href='tgt2'>a</a>")}
|
54
|
+
it { expect(subject.page_links(page){ |r| r["href"] }).to eq(%w(tgt1 tgt2))}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "#chapter_links" do
|
59
|
+
before { subject.chapter_path = "a.chapter"}
|
60
|
+
context "with no chapter_links" do
|
61
|
+
let(:page) { dom("<p></p>")}
|
62
|
+
it { expect(subject.chapter_links(page){ |r| r["href"] }).to be_empty }
|
63
|
+
end
|
64
|
+
context "with only page_links" do
|
65
|
+
let(:page) { dom("<p><a href='x'>x</a></p>")}
|
66
|
+
it { expect(subject.chapter_links(page){ |r| r["href"] }).to be_empty }
|
67
|
+
end
|
68
|
+
context "with multiple chapter_links" do
|
69
|
+
let(:page) { dom("<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
70
|
+
it { expect(subject.chapter_links(page){ |r| r["href"] }).to eq(%w(tgt1 tgt2)) }
|
71
|
+
end
|
72
|
+
context "with nil chapter_path" do
|
73
|
+
before { subject.chapter_path = nil }
|
74
|
+
let(:page) { dom("<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
75
|
+
it { expect(subject.chapter_links(page) { |r| r["href"] }).to be_empty }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Runner do
|
4
|
+
subject { described_class.new }
|
5
|
+
|
6
|
+
context "with successful command" do
|
7
|
+
it "runs commands with arguments" do
|
8
|
+
expect(subject).to receive(:system).with("cmd", "arg1", "arg2").and_return(true)
|
9
|
+
subject.run!("cmd", :params => %w(arg1 arg2))
|
10
|
+
end
|
11
|
+
|
12
|
+
it "runs commands with non string arguments" do
|
13
|
+
expect(subject).to receive(:system).with("cmd", "arg1", "1").and_return(true)
|
14
|
+
subject.run!("cmd", :params => ["arg1", 1])
|
15
|
+
end
|
16
|
+
|
17
|
+
it "runs commands with hashes and nested arrays" do
|
18
|
+
expect(subject).to receive(:system).with("cmd", "arg1", "a", "b").and_return(true)
|
19
|
+
subject.run!("cmd", :params => {arg1: %w(a b)})
|
20
|
+
end
|
21
|
+
|
22
|
+
it "runs commands with nils" do
|
23
|
+
expect(subject).to receive(:system).with("cmd", "arg1").and_return(true)
|
24
|
+
subject.run!("cmd", :params => {arg1: nil})
|
25
|
+
end
|
26
|
+
|
27
|
+
context "without verbose" do
|
28
|
+
it "doesnt log" do
|
29
|
+
expect(subject).to receive(:system).and_return(true)
|
30
|
+
subject.run!("cmd", :params => %w(arg1 arg2))
|
31
|
+
expect_to_have_logged()
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "with verbose" do
|
36
|
+
before { enable_logging }
|
37
|
+
it "logs messages" do
|
38
|
+
expect(subject).to receive(:system).and_return(true)
|
39
|
+
expect(subject.run!("cmd", :params => %w(arg1 arg2))).to be_truthy
|
40
|
+
expect_to_have_logged(/run: cmd arg1 arg2/, "", "", "success")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context "with failing command" do
|
46
|
+
it "returns false" do
|
47
|
+
expect(subject).to receive(:system).and_return(false)
|
48
|
+
expect(subject.run("cmd", :params => %w(arg1 arg2))).not_to be_truthy
|
49
|
+
end
|
50
|
+
|
51
|
+
it "raises exception" do
|
52
|
+
expect(subject).to receive(:system).and_return(false)
|
53
|
+
expect { subject.run!("cmd", :params => %w(arg1 arg2)) }.to raise_error(/trouble/)
|
54
|
+
end
|
55
|
+
|
56
|
+
context "with verbose" do
|
57
|
+
before { enable_logging }
|
58
|
+
it "logs messages" do
|
59
|
+
expect(subject).to receive(:system).and_return(false)
|
60
|
+
expect { subject.run!("cmd", :params => %w(arg1 arg2)) }.to raise_error(/trouble/)
|
61
|
+
expect_to_have_logged(/run.*cmd.*arg1 arg2/, "", "", "issue")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AudioBookCreator::SpeakerDef do
|
4
|
+
context "with no parameters" do
|
5
|
+
subject { described_class.new }
|
6
|
+
# for speaking the chapter
|
7
|
+
it { expect(subject.voice).to eq("Vicki") }
|
8
|
+
it { expect(subject.rate).to eq(280) }
|
9
|
+
# for binding the book
|
10
|
+
it { expect(subject.channels).to eq(1) }
|
11
|
+
it { expect(subject.bit_rate).to eq(32) }
|
12
|
+
it { expect(subject.max_hours).to eq(7) }
|
13
|
+
it { expect(subject.sample_rate).to eq(22_050) }
|
14
|
+
it { expect(subject.regen_audio).to be_falsy }
|
15
|
+
end
|
16
|
+
|
17
|
+
context "with parameters" do
|
18
|
+
subject do
|
19
|
+
described_class.new(
|
20
|
+
voice: "Serena",
|
21
|
+
rate: 360,
|
22
|
+
channels: 2,
|
23
|
+
bit_rate: 64,
|
24
|
+
max_hours: 2,
|
25
|
+
sample_rate: 44100,
|
26
|
+
regen_audio: true,
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
it { expect(subject.voice).to eq("Serena") }
|
31
|
+
it { expect(subject.rate).to eq(360) }
|
32
|
+
# for binding the book
|
33
|
+
it { expect(subject.channels).to eq(2) }
|
34
|
+
it { expect(subject.bit_rate).to eq(64) }
|
35
|
+
it { expect(subject.max_hours).to eq(2) }
|
36
|
+
it { expect(subject.sample_rate).to eq(44_100) }
|
37
|
+
it { expect(subject.regen_audio).to be_truthy }
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Speaker do
|
4
|
+
let(:book_def) { AudioBookCreator::BookDef.new("dir") }
|
5
|
+
let(:speaker_def) { AudioBookCreator::SpeakerDef.new(:regen_audio => false) }
|
6
|
+
subject { described_class.new(speaker_def, book_def) }
|
7
|
+
it "should require a non empty chapter" do
|
8
|
+
expect_runner.not_to receive(:system)
|
9
|
+
expect { subject.say(chapter(nil)) }.to raise_error
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should do nothing if txt and mp4 file exist" do
|
13
|
+
expect(File).to receive(:exist?).with("dir/chapter01.txt").and_return(true)
|
14
|
+
expect(File).to receive(:exist?).with("dir/chapter01.m4a").and_return(true)
|
15
|
+
|
16
|
+
expect(File).not_to receive(:write)
|
17
|
+
expect_runner.not_to receive(:system)
|
18
|
+
expect(subject.say(chapter)).to eq(spoken_chapter("the title", "dir/chapter01.m4a"))
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should create text and mp4 file" do
|
22
|
+
expect(File).to receive(:exist?).twice.and_return(false)
|
23
|
+
expect(File).to receive(:write).with("dir/chapter01.txt", "the title\n\ncontent\n")
|
24
|
+
|
25
|
+
expect_runner.to receive(:system)
|
26
|
+
.with("say", "-v", "Vicki", "-r", "280", "-f", "dir/chapter01.txt", "-o", "dir/chapter01.m4a").and_return(true)
|
27
|
+
subject.say(chapter)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "doesnt print if not verbose" do
|
31
|
+
expect(File).to receive(:exist?).twice.and_return(false)
|
32
|
+
expect(File).to receive(:write)
|
33
|
+
|
34
|
+
expect_runner.to receive(:system).and_return(true)
|
35
|
+
subject.say(chapter)
|
36
|
+
expect_to_have_logged()
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should output messages if set to verbose" do
|
40
|
+
enable_logging
|
41
|
+
expect(File).to receive(:exist?).twice.and_return(false)
|
42
|
+
expect(File).to receive(:write)
|
43
|
+
|
44
|
+
expect_runner.to receive(:system).and_return(true)
|
45
|
+
subject.say(chapter)
|
46
|
+
expect_to_have_logged(/^run:/, "", "", "success")
|
47
|
+
end
|
48
|
+
|
49
|
+
context "with force" do
|
50
|
+
before { speaker_def.regen_audio = true}
|
51
|
+
subject { described_class.new(speaker_def, book_def) }
|
52
|
+
|
53
|
+
it "should create text and mp4 file if they exist but are set to force" do
|
54
|
+
expect(File).not_to receive(:exist?)
|
55
|
+
expect(File).to receive(:write)
|
56
|
+
|
57
|
+
expect_runner.to receive(:system).and_return(true)
|
58
|
+
subject.say(chapter)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should freak if no chapters are passed in" do
|
63
|
+
expect_runner.not_to receive(:system)
|
64
|
+
expect { subject.say([]) }.to raise_error("Empty chapter")
|
65
|
+
end
|
66
|
+
|
67
|
+
context "#make_directory_structure" do
|
68
|
+
it "should create base directory" do
|
69
|
+
expect_runner.not_to receive(:system)
|
70
|
+
expect(File).to receive(:exist?).with(subject.book_def.base_dir).and_return(false)
|
71
|
+
expect(FileUtils).to receive(:mkdir).with(subject.book_def.base_dir)
|
72
|
+
subject.make_directory_structure
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should not create base directory if it exists" do
|
76
|
+
expect_runner.not_to receive(:system)
|
77
|
+
expect(File).to receive(:exist?).with(subject.book_def.base_dir).and_return(true)
|
78
|
+
expect(FileUtils).not_to receive(:mkdir)
|
79
|
+
subject.make_directory_structure
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context "#chapter_text_filename" do
|
84
|
+
let(:chapter) { AudioBookCreator::Chapter.new(number: 3) }
|
85
|
+
|
86
|
+
it do
|
87
|
+
expect_runner.not_to receive(:system)
|
88
|
+
expect(subject.chapter_text_filename(chapter)).to eq("dir/chapter03.txt")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context "#chapter_sound_filename" do
|
93
|
+
let(:chapter) { AudioBookCreator::Chapter.new(number: 2) }
|
94
|
+
it do
|
95
|
+
expect_runner.not_to receive(:system)
|
96
|
+
expect(subject.chapter_sound_filename(chapter)).to eq("dir/chapter02.m4a")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def expect_runner
|
103
|
+
expect_any_instance_of(AudioBookCreator::Runner)
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Spider do
|
4
|
+
# set a max to prevent errors from causing infinite loops
|
5
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p", "a.page", "a.chapter") }
|
6
|
+
let(:web) { {} }
|
7
|
+
let(:invalid_urls) { {} }
|
8
|
+
# NOTE: could use arrays here, but put caps to catch bugs
|
9
|
+
subject { described_class.new(page_def, web, invalid_urls) }
|
10
|
+
|
11
|
+
it "handles empty initializer" do
|
12
|
+
pristine = described_class.new(page_def)
|
13
|
+
expect(pristine.web).to be_a(Hash)
|
14
|
+
expect(pristine.invalid_urls).to be_a(Hash)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "sets arguments" do
|
18
|
+
expect(subject.page_def).to eq(page_def)
|
19
|
+
expect(subject.web).to eq(web)
|
20
|
+
expect(subject.invalid_urls).not_to be_nil
|
21
|
+
end
|
22
|
+
|
23
|
+
context "#visit" do
|
24
|
+
it "visit urls" do
|
25
|
+
expect_visit_page "page1", "x"
|
26
|
+
expect(subject.run(uri(%w(page1)))).to eq([page(site("page1"),"x")])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "visit string" do
|
30
|
+
expect_visit_page "page1", "x"
|
31
|
+
expect(subject.run(site(%w(page1)))).to eq([page(site("page1"),"x")])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "visit multiple pages" do
|
35
|
+
expect_visit_page "page1"
|
36
|
+
expect_visit_page "page2"
|
37
|
+
expect(subject.run(uri(%w(page1 page2))))
|
38
|
+
.to eq([page(site("page1")), page(site("page2"))])
|
39
|
+
end
|
40
|
+
|
41
|
+
it "visit unique list of pages" do
|
42
|
+
expect_visit_page "page1", link("page2"), link("page2")
|
43
|
+
expect_visit_page "page2"
|
44
|
+
expect(subject.run uri(%w(page1)))
|
45
|
+
.to eq([page(site("page1"),link("page2"), link("page2")), page(site("page2"))])
|
46
|
+
end
|
47
|
+
|
48
|
+
it "skips loops from uri" do
|
49
|
+
expect_visit_page "page1", link("page1")
|
50
|
+
subject.run uri(%w(page1))
|
51
|
+
end
|
52
|
+
|
53
|
+
it "skips loops from string" do
|
54
|
+
expect_visit_page "page1", link("page1")
|
55
|
+
subject.run site(%w(page1))
|
56
|
+
end
|
57
|
+
|
58
|
+
it "also accepts string urls" do
|
59
|
+
expect_visit_page "page1"
|
60
|
+
subject.run site(%w(page1))
|
61
|
+
end
|
62
|
+
|
63
|
+
it "skips empty urls" do
|
64
|
+
expect_visit_page "page1", "<a>x</a>"
|
65
|
+
subject.run site(%w(page1))
|
66
|
+
end
|
67
|
+
|
68
|
+
it "skips blank urls" do
|
69
|
+
expect_visit_page "page1", "<a href=\"\">x</a>"
|
70
|
+
subject.run site(%w(page1))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
it "follows relative links" do
|
75
|
+
expect_visit_page("page1", link("page2"))
|
76
|
+
expect_visit_page("page2")
|
77
|
+
subject.run uri(%w(page1))
|
78
|
+
end
|
79
|
+
|
80
|
+
it "follows absolute links" do
|
81
|
+
expect_visit_page("page1", link(site("page2")))
|
82
|
+
expect_visit_page("page2")
|
83
|
+
subject.run uri(%w(page1))
|
84
|
+
end
|
85
|
+
|
86
|
+
# in the end of the day, these links reference the local page which is ignored, so no code necessary
|
87
|
+
it "skips empty, blank, and local ref links" do
|
88
|
+
p1_contents = "<a id='a1'>a1</a>", "<a href=''>a2</a>", "<a href='#a'>x</a>", link("page2")
|
89
|
+
expect_visit_page("page1", *p1_contents)
|
90
|
+
expect_visit_page("page2")
|
91
|
+
expect(subject.run uri(%w(page1))).to eq([page(site("page1"), *p1_contents), page(site("page2"))])
|
92
|
+
end
|
93
|
+
|
94
|
+
it "visits all pages once (and only once)" do
|
95
|
+
expect_visit_page("page1", link("page2"))
|
96
|
+
expect_visit_page("page2", link("page1"), link("page3"))
|
97
|
+
expect_visit_page("page3", link("page1"), link("page2"))
|
98
|
+
subject.run uri(%w(page1))
|
99
|
+
end
|
100
|
+
|
101
|
+
it "visits all chapters once (and only once)" do
|
102
|
+
expect_visit_page("page1", link("page2", "chapter"))
|
103
|
+
expect_visit_page("page2", link("page1", "chapter"), link("page3", "chapter"))
|
104
|
+
expect_visit_page("page3", link("page1", "chapter"), link("page2", "chapter"))
|
105
|
+
subject.run uri(%w(page1))
|
106
|
+
end
|
107
|
+
|
108
|
+
it "visits chapters too" do
|
109
|
+
expect_visit_page("page1", link("page2", "chapter"))
|
110
|
+
expect_visit_page("page2")
|
111
|
+
subject.run uri(%w(page1))
|
112
|
+
end
|
113
|
+
|
114
|
+
it "leverages page_def to determine good links" do
|
115
|
+
page_def.link_path = ".good a"
|
116
|
+
expect_visit_page("page1", "<div class='good'>", link("good"), "</div>", link("bad"))
|
117
|
+
expect_visit_page("good")
|
118
|
+
subject.run uri(%w(page1))
|
119
|
+
end
|
120
|
+
|
121
|
+
it "ignores #target in url" do
|
122
|
+
expect_visit_page("page1", link("page1#target"))
|
123
|
+
subject.run uri(%w(page1))
|
124
|
+
end
|
125
|
+
|
126
|
+
it "skips bad urls" do
|
127
|
+
expect_visit_page("page1", link("%@")) # it never gets to call a second time
|
128
|
+
expect { subject.run uri(%w(page1)) }.to raise_error(/bad URI/)
|
129
|
+
end
|
130
|
+
|
131
|
+
it "uses url filter for pages" do
|
132
|
+
expect(invalid_urls).to receive(:include?).with(URI.parse(site("x.pdf"))).and_raise("bad file extension")
|
133
|
+
expect_visit_page("page1", link("x.pdf"))
|
134
|
+
expect { subject.run uri(%w(page1)) }.to raise_error("bad file extension")
|
135
|
+
end
|
136
|
+
|
137
|
+
it "uses url filter for chapters" do
|
138
|
+
expect(invalid_urls).to receive(:include?).with(URI.parse(site("x.pdf"))).and_raise("bad file extension")
|
139
|
+
expect_visit_page("page1", link("x.pdf", "chapter"))
|
140
|
+
expect { subject.run uri(%w(page1)) }.to raise_error("bad file extension")
|
141
|
+
end
|
142
|
+
|
143
|
+
context "with invalid_urls" do
|
144
|
+
it "skips invalid_urls" do
|
145
|
+
expect(subject.invalid_urls).to receive(:include?).with(uri("bad")).and_return(true)
|
146
|
+
expect_visit_page("page1", link("bad"))
|
147
|
+
subject.run uri(%w(page1))
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
context "logging" do
|
152
|
+
it "logs page visits" do
|
153
|
+
enable_logging
|
154
|
+
expect_visit_page("page1")
|
155
|
+
subject.run uri(%w(page1))
|
156
|
+
expect_to_have_logged("visit #{uri("page1")}")
|
157
|
+
end
|
158
|
+
|
159
|
+
it "doesnt log page visits" do
|
160
|
+
expect_visit_page("page1")
|
161
|
+
subject.run uri(%w(page1))
|
162
|
+
expect_to_have_logged()
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
def expect_visit_page(url, *args)
|
169
|
+
url = site(url)
|
170
|
+
expect(web).to receive(:[]).with(url.to_s).and_return(page(url, *args))
|
171
|
+
end
|
172
|
+
end
|