audio_book_creator 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.rspec +4 -0
- data/.travis.yml +8 -0
- data/Gemfile +8 -0
- data/README.md +60 -0
- data/Rakefile +8 -0
- data/audio_book_creator.gemspec +31 -0
- data/bin/audio_book_creator +6 -0
- data/lib/audio_book_creator.rb +59 -0
- data/lib/audio_book_creator/binder.rb +61 -0
- data/lib/audio_book_creator/book_creator.rb +31 -0
- data/lib/audio_book_creator/book_def.rb +36 -0
- data/lib/audio_book_creator/cached_hash.rb +20 -0
- data/lib/audio_book_creator/cascading_array.rb +57 -0
- data/lib/audio_book_creator/chapter.rb +33 -0
- data/lib/audio_book_creator/cli.rb +119 -0
- data/lib/audio_book_creator/conductor.rb +67 -0
- data/lib/audio_book_creator/editor.rb +20 -0
- data/lib/audio_book_creator/logging.rb +7 -0
- data/lib/audio_book_creator/page_db.rb +42 -0
- data/lib/audio_book_creator/page_def.rb +31 -0
- data/lib/audio_book_creator/runner.rb +22 -0
- data/lib/audio_book_creator/speaker.rb +54 -0
- data/lib/audio_book_creator/speaker_def.rb +39 -0
- data/lib/audio_book_creator/spider.rb +60 -0
- data/lib/audio_book_creator/spoken_chapter.rb +16 -0
- data/lib/audio_book_creator/surfer_def.rb +15 -0
- data/lib/audio_book_creator/url_filter.rb +33 -0
- data/lib/audio_book_creator/version.rb +3 -0
- data/lib/audio_book_creator/web.rb +44 -0
- data/spec/audio_book_creator/binder_spec.rb +103 -0
- data/spec/audio_book_creator/book_creator_spec.rb +63 -0
- data/spec/audio_book_creator/book_def_spec.rb +61 -0
- data/spec/audio_book_creator/cached_hash_spec.rb +19 -0
- data/spec/audio_book_creator/cascading_array_spec.rb +64 -0
- data/spec/audio_book_creator/chapter_spec.rb +80 -0
- data/spec/audio_book_creator/cli_spec.rb +274 -0
- data/spec/audio_book_creator/conductor_spec.rb +102 -0
- data/spec/audio_book_creator/editor_spec.rb +39 -0
- data/spec/audio_book_creator/logging_spec.rb +21 -0
- data/spec/audio_book_creator/page_db_spec.rb +74 -0
- data/spec/audio_book_creator/page_def_spec.rb +79 -0
- data/spec/audio_book_creator/runner_spec.rb +65 -0
- data/spec/audio_book_creator/speaker_def_spec.rb +39 -0
- data/spec/audio_book_creator/speaker_spec.rb +105 -0
- data/spec/audio_book_creator/spider_spec.rb +172 -0
- data/spec/audio_book_creator/spoken_chapter_spec.rb +30 -0
- data/spec/audio_book_creator/surfer_def_spec.rb +17 -0
- data/spec/audio_book_creator/url_filter_spec.rb +52 -0
- data/spec/audio_book_creator/version_spec.rb +5 -0
- data/spec/audio_book_creator/web_spec.rb +66 -0
- data/spec/audio_book_creator_spec.rb +25 -0
- data/spec/spec_helper.rb +106 -0
- data/spec/support/test_logger.rb +21 -0
- metadata +238 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AudioBookCreator::Editor do
|
4
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p") }
|
5
|
+
subject { described_class.new(page_def) }
|
6
|
+
let(:chapter1) { chapter("first\n\nsecond", "the title") }
|
7
|
+
it "should generate a page" do
|
8
|
+
expect(subject.parse([page("page1", "<h1>the title</h1>",
|
9
|
+
"<p>first</p>", "<p>second</p>")
|
10
|
+
])).to eq([chapter1])
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should respect content path" do
|
14
|
+
page_def.title_path = "h3"
|
15
|
+
page_def.body_path = "#story p"
|
16
|
+
expect(subject.parse([page("page1", "<h3>the title</h3>",
|
17
|
+
"<div id='story'>", "<p>first</p>", "<p>second</p>", "</div>",
|
18
|
+
"<p>bad</p>")
|
19
|
+
])).to eq([chapter1])
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should ignore body formatting" do
|
23
|
+
expect(subject.parse([page("page1", "<h1>the title</h1>",
|
24
|
+
"<p><a href='#this'>first</a></p>", "<p><b>second</b></p>")
|
25
|
+
])).to eq([chapter1])
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should parse multiple pages" do
|
29
|
+
expect(subject.parse([page("page1", "<h1>p1</h1>", "<p>first</p>"),
|
30
|
+
page("page2", "<h1>p2</h1>", "<p>second</p>"),
|
31
|
+
])).to eq([chapter("first", "p1", 1), chapter("second", "p2", 2)])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should default the title if none found" do
|
35
|
+
expect(subject.parse([page("page1", "<p>first</p>"),
|
36
|
+
page("page2", "<p>second</p>"),
|
37
|
+
])).to eq([chapter("first", "Chapter 1", 1), chapter("second", "Chapter 2", 2)])
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Logging do
|
4
|
+
subject { Class.new.tap { |c| c.send(:include, described_class) }.new}
|
5
|
+
it "should not log strings when verbose is off" do
|
6
|
+
subject.logger.info "phrase"
|
7
|
+
expect_to_have_logged()
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should log strings" do
|
11
|
+
enable_logging
|
12
|
+
subject.logger.info "phrase"
|
13
|
+
expect_to_have_logged("phrase")
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should log blocks" do
|
17
|
+
enable_logging
|
18
|
+
subject.logger.info { "phrase" }
|
19
|
+
expect_to_have_logged("phrase")
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
describe AudioBookCreator::PageDb do
|
5
|
+
subject { described_class.new(":memory:") }
|
6
|
+
|
7
|
+
# all of these tests are in memory
|
8
|
+
# this is the only test that depends upon it
|
9
|
+
context "with memory databases" do
|
10
|
+
it "does not create a file" do
|
11
|
+
# access key to trigger database creation
|
12
|
+
subject["key"]
|
13
|
+
expect(File).not_to be_exist(":memory:")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
it "works" do
|
18
|
+
expect(subject).not_to be_nil
|
19
|
+
end
|
20
|
+
|
21
|
+
it "creates cache value" do
|
22
|
+
subject["key"] = "value"
|
23
|
+
|
24
|
+
expect(subject["key"]).to eq("value")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "include good key" do
|
28
|
+
subject["key"] = "value"
|
29
|
+
expect(subject).to include("key")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "doesnt include bad key" do
|
33
|
+
expect(subject).not_to include("key")
|
34
|
+
end
|
35
|
+
|
36
|
+
context "with prepopulated (file) database" do
|
37
|
+
let(:tmp) { Tempfile.new("db") }
|
38
|
+
|
39
|
+
before do
|
40
|
+
db = described_class.new(tmp.path)
|
41
|
+
db["key"] = "value"
|
42
|
+
end
|
43
|
+
|
44
|
+
after do
|
45
|
+
tmp.close
|
46
|
+
tmp.unlink
|
47
|
+
end
|
48
|
+
|
49
|
+
it "finds entry in previously created cache" do
|
50
|
+
db = described_class.new(tmp.path)
|
51
|
+
expect(db["key"]).to eq("value")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "creates a file" do
|
55
|
+
expect(File.exist?(tmp.path)).to be_truthy
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
it "handles url keys" do
|
60
|
+
key = "http://the.web.site.com/path/to/cgi?param1=x¶m2=y#substuff"
|
61
|
+
contents = "a" * 555
|
62
|
+
subject[key] = contents
|
63
|
+
expect(subject[key]).to eq(contents)
|
64
|
+
end
|
65
|
+
|
66
|
+
it "supports enumerable (map)" do
|
67
|
+
subject["keyc"] = "v"
|
68
|
+
subject["keya"] = "v"
|
69
|
+
subject["keyz"] = "v"
|
70
|
+
|
71
|
+
expect(subject.map { |(n, v)| "#{n}:#{v}" }).to eq(%w(keyc:v keya:v keyz:v))
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::PageDef do
|
4
|
+
context "with no parameter" do
|
5
|
+
subject { described_class.new() }
|
6
|
+
it { expect(subject.title_path).to eq("h1") }
|
7
|
+
it { expect(subject.body_path).to eq("p") }
|
8
|
+
it { expect(subject.link_path).to eq("a") }
|
9
|
+
it { expect(subject.chapter_path).to be_nil }
|
10
|
+
end
|
11
|
+
|
12
|
+
context "with all parameters" do
|
13
|
+
subject { described_class.new("h1.title", "div", "a.link", "a.chapter") }
|
14
|
+
it { expect(subject.title_path).to eq("h1.title") }
|
15
|
+
it { expect(subject.body_path).to eq("div") }
|
16
|
+
it { expect(subject.link_path).to eq("a.link") }
|
17
|
+
it { expect(subject.chapter_path).to eq("a.chapter") }
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "#title" do
|
21
|
+
context "with no title" do
|
22
|
+
let(:page) { dom("<p></p>")}
|
23
|
+
it { expect(subject.title(page)).to be_nil}
|
24
|
+
end
|
25
|
+
context "with title" do
|
26
|
+
let(:page) { dom("<h1>title</h1>")}
|
27
|
+
it { expect(subject.title(page)).to eq("title")}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# NOTE: chapter uses array.join
|
32
|
+
describe "#body" do
|
33
|
+
context "with no body" do
|
34
|
+
let(:page) { dom("<h1></h1>")}
|
35
|
+
it { expect(subject.body(page)).to be_empty}
|
36
|
+
end
|
37
|
+
context "with body" do
|
38
|
+
let(:page) { dom("<p>p1</p>")}
|
39
|
+
it { expect(Array(subject.body(page)).join).to eq("p1") }
|
40
|
+
end
|
41
|
+
context "with many bodies" do
|
42
|
+
let(:page) { dom("<p>p1</p><p>p2</p><p>p3</p><p>p4</p>")}
|
43
|
+
it { expect(Array(subject.body(page)).join).to eq(%w(p1 p2 p3 p4).join) }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#page_links" do
|
48
|
+
context "with no page_links" do
|
49
|
+
let(:page) { dom("<p></p>")}
|
50
|
+
it { expect(subject.page_links(page){ |r| r["href"] }).to be_empty}
|
51
|
+
end
|
52
|
+
context "with multiple page_links" do
|
53
|
+
let(:page) { dom("<a href='tgt1'>a</a><a href='tgt2'>a</a>")}
|
54
|
+
it { expect(subject.page_links(page){ |r| r["href"] }).to eq(%w(tgt1 tgt2))}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "#chapter_links" do
|
59
|
+
before { subject.chapter_path = "a.chapter"}
|
60
|
+
context "with no chapter_links" do
|
61
|
+
let(:page) { dom("<p></p>")}
|
62
|
+
it { expect(subject.chapter_links(page){ |r| r["href"] }).to be_empty }
|
63
|
+
end
|
64
|
+
context "with only page_links" do
|
65
|
+
let(:page) { dom("<p><a href='x'>x</a></p>")}
|
66
|
+
it { expect(subject.chapter_links(page){ |r| r["href"] }).to be_empty }
|
67
|
+
end
|
68
|
+
context "with multiple chapter_links" do
|
69
|
+
let(:page) { dom("<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
70
|
+
it { expect(subject.chapter_links(page){ |r| r["href"] }).to eq(%w(tgt1 tgt2)) }
|
71
|
+
end
|
72
|
+
context "with nil chapter_path" do
|
73
|
+
before { subject.chapter_path = nil }
|
74
|
+
let(:page) { dom("<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
75
|
+
it { expect(subject.chapter_links(page) { |r| r["href"] }).to be_empty }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Runner do
|
4
|
+
subject { described_class.new }
|
5
|
+
|
6
|
+
context "with successful command" do
|
7
|
+
it "runs commands with arguments" do
|
8
|
+
expect(subject).to receive(:system).with("cmd", "arg1", "arg2").and_return(true)
|
9
|
+
subject.run!("cmd", :params => %w(arg1 arg2))
|
10
|
+
end
|
11
|
+
|
12
|
+
it "runs commands with non string arguments" do
|
13
|
+
expect(subject).to receive(:system).with("cmd", "arg1", "1").and_return(true)
|
14
|
+
subject.run!("cmd", :params => ["arg1", 1])
|
15
|
+
end
|
16
|
+
|
17
|
+
it "runs commands with hashes and nested arrays" do
|
18
|
+
expect(subject).to receive(:system).with("cmd", "arg1", "a", "b").and_return(true)
|
19
|
+
subject.run!("cmd", :params => {arg1: %w(a b)})
|
20
|
+
end
|
21
|
+
|
22
|
+
it "runs commands with nils" do
|
23
|
+
expect(subject).to receive(:system).with("cmd", "arg1").and_return(true)
|
24
|
+
subject.run!("cmd", :params => {arg1: nil})
|
25
|
+
end
|
26
|
+
|
27
|
+
context "without verbose" do
|
28
|
+
it "doesnt log" do
|
29
|
+
expect(subject).to receive(:system).and_return(true)
|
30
|
+
subject.run!("cmd", :params => %w(arg1 arg2))
|
31
|
+
expect_to_have_logged()
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "with verbose" do
|
36
|
+
before { enable_logging }
|
37
|
+
it "logs messages" do
|
38
|
+
expect(subject).to receive(:system).and_return(true)
|
39
|
+
expect(subject.run!("cmd", :params => %w(arg1 arg2))).to be_truthy
|
40
|
+
expect_to_have_logged(/run: cmd arg1 arg2/, "", "", "success")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context "with failing command" do
|
46
|
+
it "returns false" do
|
47
|
+
expect(subject).to receive(:system).and_return(false)
|
48
|
+
expect(subject.run("cmd", :params => %w(arg1 arg2))).not_to be_truthy
|
49
|
+
end
|
50
|
+
|
51
|
+
it "raises exception" do
|
52
|
+
expect(subject).to receive(:system).and_return(false)
|
53
|
+
expect { subject.run!("cmd", :params => %w(arg1 arg2)) }.to raise_error(/trouble/)
|
54
|
+
end
|
55
|
+
|
56
|
+
context "with verbose" do
|
57
|
+
before { enable_logging }
|
58
|
+
it "logs messages" do
|
59
|
+
expect(subject).to receive(:system).and_return(false)
|
60
|
+
expect { subject.run!("cmd", :params => %w(arg1 arg2)) }.to raise_error(/trouble/)
|
61
|
+
expect_to_have_logged(/run.*cmd.*arg1 arg2/, "", "", "issue")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AudioBookCreator::SpeakerDef do
|
4
|
+
context "with no parameters" do
|
5
|
+
subject { described_class.new }
|
6
|
+
# for speaking the chapter
|
7
|
+
it { expect(subject.voice).to eq("Vicki") }
|
8
|
+
it { expect(subject.rate).to eq(280) }
|
9
|
+
# for binding the book
|
10
|
+
it { expect(subject.channels).to eq(1) }
|
11
|
+
it { expect(subject.bit_rate).to eq(32) }
|
12
|
+
it { expect(subject.max_hours).to eq(7) }
|
13
|
+
it { expect(subject.sample_rate).to eq(22_050) }
|
14
|
+
it { expect(subject.regen_audio).to be_falsy }
|
15
|
+
end
|
16
|
+
|
17
|
+
context "with parameters" do
|
18
|
+
subject do
|
19
|
+
described_class.new(
|
20
|
+
voice: "Serena",
|
21
|
+
rate: 360,
|
22
|
+
channels: 2,
|
23
|
+
bit_rate: 64,
|
24
|
+
max_hours: 2,
|
25
|
+
sample_rate: 44100,
|
26
|
+
regen_audio: true,
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
it { expect(subject.voice).to eq("Serena") }
|
31
|
+
it { expect(subject.rate).to eq(360) }
|
32
|
+
# for binding the book
|
33
|
+
it { expect(subject.channels).to eq(2) }
|
34
|
+
it { expect(subject.bit_rate).to eq(64) }
|
35
|
+
it { expect(subject.max_hours).to eq(2) }
|
36
|
+
it { expect(subject.sample_rate).to eq(44_100) }
|
37
|
+
it { expect(subject.regen_audio).to be_truthy }
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Speaker do
|
4
|
+
let(:book_def) { AudioBookCreator::BookDef.new("dir") }
|
5
|
+
let(:speaker_def) { AudioBookCreator::SpeakerDef.new(:regen_audio => false) }
|
6
|
+
subject { described_class.new(speaker_def, book_def) }
|
7
|
+
it "should require a non empty chapter" do
|
8
|
+
expect_runner.not_to receive(:system)
|
9
|
+
expect { subject.say(chapter(nil)) }.to raise_error
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should do nothing if txt and mp4 file exist" do
|
13
|
+
expect(File).to receive(:exist?).with("dir/chapter01.txt").and_return(true)
|
14
|
+
expect(File).to receive(:exist?).with("dir/chapter01.m4a").and_return(true)
|
15
|
+
|
16
|
+
expect(File).not_to receive(:write)
|
17
|
+
expect_runner.not_to receive(:system)
|
18
|
+
expect(subject.say(chapter)).to eq(spoken_chapter("the title", "dir/chapter01.m4a"))
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should create text and mp4 file" do
|
22
|
+
expect(File).to receive(:exist?).twice.and_return(false)
|
23
|
+
expect(File).to receive(:write).with("dir/chapter01.txt", "the title\n\ncontent\n")
|
24
|
+
|
25
|
+
expect_runner.to receive(:system)
|
26
|
+
.with("say", "-v", "Vicki", "-r", "280", "-f", "dir/chapter01.txt", "-o", "dir/chapter01.m4a").and_return(true)
|
27
|
+
subject.say(chapter)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "doesnt print if not verbose" do
|
31
|
+
expect(File).to receive(:exist?).twice.and_return(false)
|
32
|
+
expect(File).to receive(:write)
|
33
|
+
|
34
|
+
expect_runner.to receive(:system).and_return(true)
|
35
|
+
subject.say(chapter)
|
36
|
+
expect_to_have_logged()
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should output messages if set to verbose" do
|
40
|
+
enable_logging
|
41
|
+
expect(File).to receive(:exist?).twice.and_return(false)
|
42
|
+
expect(File).to receive(:write)
|
43
|
+
|
44
|
+
expect_runner.to receive(:system).and_return(true)
|
45
|
+
subject.say(chapter)
|
46
|
+
expect_to_have_logged(/^run:/, "", "", "success")
|
47
|
+
end
|
48
|
+
|
49
|
+
context "with force" do
|
50
|
+
before { speaker_def.regen_audio = true}
|
51
|
+
subject { described_class.new(speaker_def, book_def) }
|
52
|
+
|
53
|
+
it "should create text and mp4 file if they exist but are set to force" do
|
54
|
+
expect(File).not_to receive(:exist?)
|
55
|
+
expect(File).to receive(:write)
|
56
|
+
|
57
|
+
expect_runner.to receive(:system).and_return(true)
|
58
|
+
subject.say(chapter)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should freak if no chapters are passed in" do
|
63
|
+
expect_runner.not_to receive(:system)
|
64
|
+
expect { subject.say([]) }.to raise_error("Empty chapter")
|
65
|
+
end
|
66
|
+
|
67
|
+
context "#make_directory_structure" do
|
68
|
+
it "should create base directory" do
|
69
|
+
expect_runner.not_to receive(:system)
|
70
|
+
expect(File).to receive(:exist?).with(subject.book_def.base_dir).and_return(false)
|
71
|
+
expect(FileUtils).to receive(:mkdir).with(subject.book_def.base_dir)
|
72
|
+
subject.make_directory_structure
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should not create base directory if it exists" do
|
76
|
+
expect_runner.not_to receive(:system)
|
77
|
+
expect(File).to receive(:exist?).with(subject.book_def.base_dir).and_return(true)
|
78
|
+
expect(FileUtils).not_to receive(:mkdir)
|
79
|
+
subject.make_directory_structure
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context "#chapter_text_filename" do
|
84
|
+
let(:chapter) { AudioBookCreator::Chapter.new(number: 3) }
|
85
|
+
|
86
|
+
it do
|
87
|
+
expect_runner.not_to receive(:system)
|
88
|
+
expect(subject.chapter_text_filename(chapter)).to eq("dir/chapter03.txt")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context "#chapter_sound_filename" do
|
93
|
+
let(:chapter) { AudioBookCreator::Chapter.new(number: 2) }
|
94
|
+
it do
|
95
|
+
expect_runner.not_to receive(:system)
|
96
|
+
expect(subject.chapter_sound_filename(chapter)).to eq("dir/chapter02.m4a")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def expect_runner
|
103
|
+
expect_any_instance_of(AudioBookCreator::Runner)
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe AudioBookCreator::Spider do
|
4
|
+
# set a max to prevent errors from causing infinite loops
|
5
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p", "a.page", "a.chapter") }
|
6
|
+
let(:web) { {} }
|
7
|
+
let(:invalid_urls) { {} }
|
8
|
+
# NOTE: could use arrays here, but put caps to catch bugs
|
9
|
+
subject { described_class.new(page_def, web, invalid_urls) }
|
10
|
+
|
11
|
+
it "handles empty initializer" do
|
12
|
+
pristine = described_class.new(page_def)
|
13
|
+
expect(pristine.web).to be_a(Hash)
|
14
|
+
expect(pristine.invalid_urls).to be_a(Hash)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "sets arguments" do
|
18
|
+
expect(subject.page_def).to eq(page_def)
|
19
|
+
expect(subject.web).to eq(web)
|
20
|
+
expect(subject.invalid_urls).not_to be_nil
|
21
|
+
end
|
22
|
+
|
23
|
+
context "#visit" do
|
24
|
+
it "visit urls" do
|
25
|
+
expect_visit_page "page1", "x"
|
26
|
+
expect(subject.run(uri(%w(page1)))).to eq([page(site("page1"),"x")])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "visit string" do
|
30
|
+
expect_visit_page "page1", "x"
|
31
|
+
expect(subject.run(site(%w(page1)))).to eq([page(site("page1"),"x")])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "visit multiple pages" do
|
35
|
+
expect_visit_page "page1"
|
36
|
+
expect_visit_page "page2"
|
37
|
+
expect(subject.run(uri(%w(page1 page2))))
|
38
|
+
.to eq([page(site("page1")), page(site("page2"))])
|
39
|
+
end
|
40
|
+
|
41
|
+
it "visit unique list of pages" do
|
42
|
+
expect_visit_page "page1", link("page2"), link("page2")
|
43
|
+
expect_visit_page "page2"
|
44
|
+
expect(subject.run uri(%w(page1)))
|
45
|
+
.to eq([page(site("page1"),link("page2"), link("page2")), page(site("page2"))])
|
46
|
+
end
|
47
|
+
|
48
|
+
it "skips loops from uri" do
|
49
|
+
expect_visit_page "page1", link("page1")
|
50
|
+
subject.run uri(%w(page1))
|
51
|
+
end
|
52
|
+
|
53
|
+
it "skips loops from string" do
|
54
|
+
expect_visit_page "page1", link("page1")
|
55
|
+
subject.run site(%w(page1))
|
56
|
+
end
|
57
|
+
|
58
|
+
it "also accepts string urls" do
|
59
|
+
expect_visit_page "page1"
|
60
|
+
subject.run site(%w(page1))
|
61
|
+
end
|
62
|
+
|
63
|
+
it "skips empty urls" do
|
64
|
+
expect_visit_page "page1", "<a>x</a>"
|
65
|
+
subject.run site(%w(page1))
|
66
|
+
end
|
67
|
+
|
68
|
+
it "skips blank urls" do
|
69
|
+
expect_visit_page "page1", "<a href=\"\">x</a>"
|
70
|
+
subject.run site(%w(page1))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
it "follows relative links" do
|
75
|
+
expect_visit_page("page1", link("page2"))
|
76
|
+
expect_visit_page("page2")
|
77
|
+
subject.run uri(%w(page1))
|
78
|
+
end
|
79
|
+
|
80
|
+
it "follows absolute links" do
|
81
|
+
expect_visit_page("page1", link(site("page2")))
|
82
|
+
expect_visit_page("page2")
|
83
|
+
subject.run uri(%w(page1))
|
84
|
+
end
|
85
|
+
|
86
|
+
# in the end of the day, these links reference the local page which is ignored, so no code necessary
|
87
|
+
it "skips empty, blank, and local ref links" do
|
88
|
+
p1_contents = "<a id='a1'>a1</a>", "<a href=''>a2</a>", "<a href='#a'>x</a>", link("page2")
|
89
|
+
expect_visit_page("page1", *p1_contents)
|
90
|
+
expect_visit_page("page2")
|
91
|
+
expect(subject.run uri(%w(page1))).to eq([page(site("page1"), *p1_contents), page(site("page2"))])
|
92
|
+
end
|
93
|
+
|
94
|
+
it "visits all pages once (and only once)" do
|
95
|
+
expect_visit_page("page1", link("page2"))
|
96
|
+
expect_visit_page("page2", link("page1"), link("page3"))
|
97
|
+
expect_visit_page("page3", link("page1"), link("page2"))
|
98
|
+
subject.run uri(%w(page1))
|
99
|
+
end
|
100
|
+
|
101
|
+
it "visits all chapters once (and only once)" do
|
102
|
+
expect_visit_page("page1", link("page2", "chapter"))
|
103
|
+
expect_visit_page("page2", link("page1", "chapter"), link("page3", "chapter"))
|
104
|
+
expect_visit_page("page3", link("page1", "chapter"), link("page2", "chapter"))
|
105
|
+
subject.run uri(%w(page1))
|
106
|
+
end
|
107
|
+
|
108
|
+
it "visits chapters too" do
|
109
|
+
expect_visit_page("page1", link("page2", "chapter"))
|
110
|
+
expect_visit_page("page2")
|
111
|
+
subject.run uri(%w(page1))
|
112
|
+
end
|
113
|
+
|
114
|
+
it "leverages page_def to determine good links" do
|
115
|
+
page_def.link_path = ".good a"
|
116
|
+
expect_visit_page("page1", "<div class='good'>", link("good"), "</div>", link("bad"))
|
117
|
+
expect_visit_page("good")
|
118
|
+
subject.run uri(%w(page1))
|
119
|
+
end
|
120
|
+
|
121
|
+
it "ignores #target in url" do
|
122
|
+
expect_visit_page("page1", link("page1#target"))
|
123
|
+
subject.run uri(%w(page1))
|
124
|
+
end
|
125
|
+
|
126
|
+
it "skips bad urls" do
|
127
|
+
expect_visit_page("page1", link("%@")) # it never gets to call a second time
|
128
|
+
expect { subject.run uri(%w(page1)) }.to raise_error(/bad URI/)
|
129
|
+
end
|
130
|
+
|
131
|
+
it "uses url filter for pages" do
|
132
|
+
expect(invalid_urls).to receive(:include?).with(URI.parse(site("x.pdf"))).and_raise("bad file extension")
|
133
|
+
expect_visit_page("page1", link("x.pdf"))
|
134
|
+
expect { subject.run uri(%w(page1)) }.to raise_error("bad file extension")
|
135
|
+
end
|
136
|
+
|
137
|
+
it "uses url filter for chapters" do
|
138
|
+
expect(invalid_urls).to receive(:include?).with(URI.parse(site("x.pdf"))).and_raise("bad file extension")
|
139
|
+
expect_visit_page("page1", link("x.pdf", "chapter"))
|
140
|
+
expect { subject.run uri(%w(page1)) }.to raise_error("bad file extension")
|
141
|
+
end
|
142
|
+
|
143
|
+
context "with invalid_urls" do
|
144
|
+
it "skips invalid_urls" do
|
145
|
+
expect(subject.invalid_urls).to receive(:include?).with(uri("bad")).and_return(true)
|
146
|
+
expect_visit_page("page1", link("bad"))
|
147
|
+
subject.run uri(%w(page1))
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
context "logging" do
|
152
|
+
it "logs page visits" do
|
153
|
+
enable_logging
|
154
|
+
expect_visit_page("page1")
|
155
|
+
subject.run uri(%w(page1))
|
156
|
+
expect_to_have_logged("visit #{uri("page1")}")
|
157
|
+
end
|
158
|
+
|
159
|
+
it "doesnt log page visits" do
|
160
|
+
expect_visit_page("page1")
|
161
|
+
subject.run uri(%w(page1))
|
162
|
+
expect_to_have_logged()
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
def expect_visit_page(url, *args)
|
169
|
+
url = site(url)
|
170
|
+
expect(web).to receive(:[]).with(url.to_s).and_return(page(url, *args))
|
171
|
+
end
|
172
|
+
end
|