audio_book_creator 0.0.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +2 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile +3 -3
- data/README.md +9 -4
- data/audio_book_creator.gemspec +3 -3
- data/{bin → exe}/audio_book_creator +3 -0
- data/lib/audio_book_creator.rb +4 -2
- data/lib/audio_book_creator/binder.rb +2 -1
- data/lib/audio_book_creator/book_def.rb +2 -2
- data/lib/audio_book_creator/cached_hash.rb +1 -1
- data/lib/audio_book_creator/cascading_array.rb +8 -8
- data/lib/audio_book_creator/chapter.rb +1 -1
- data/lib/audio_book_creator/cli.rb +36 -29
- data/lib/audio_book_creator/conductor.rb +5 -3
- data/lib/audio_book_creator/defaulter.rb +41 -0
- data/lib/audio_book_creator/editor.rb +2 -3
- data/lib/audio_book_creator/page_db.rb +14 -8
- data/lib/audio_book_creator/page_def.rb +7 -15
- data/lib/audio_book_creator/runner.rb +5 -3
- data/lib/audio_book_creator/speaker.rb +1 -1
- data/lib/audio_book_creator/spider.rb +9 -28
- data/lib/audio_book_creator/surfer_def.rb +1 -5
- data/lib/audio_book_creator/url_filter.rb +1 -1
- data/lib/audio_book_creator/version.rb +1 -1
- data/lib/audio_book_creator/web_page.rb +49 -0
- data/run_mutant +89 -0
- data/spec/audio_book_creator/binder_spec.rb +3 -3
- data/spec/audio_book_creator/book_creator_spec.rb +2 -3
- data/spec/audio_book_creator/book_def_spec.rb +33 -22
- data/spec/audio_book_creator/cached_hash_spec.rb +4 -0
- data/spec/audio_book_creator/cli_spec.rb +189 -122
- data/spec/audio_book_creator/conductor_spec.rb +17 -6
- data/spec/audio_book_creator/defaulter_spec.rb +154 -0
- data/spec/audio_book_creator/editor_spec.rb +7 -7
- data/spec/audio_book_creator/page_db_spec.rb +73 -11
- data/spec/audio_book_creator/page_def_spec.rb +26 -40
- data/spec/audio_book_creator/speaker_spec.rb +2 -2
- data/spec/audio_book_creator/spider_spec.rb +10 -15
- data/spec/audio_book_creator/surfer_def_spec.rb +1 -4
- data/spec/audio_book_creator/url_filter_spec.rb +1 -1
- data/spec/audio_book_creator/web_page_spec.rb +65 -0
- data/spec/audio_book_creator_spec.rb +23 -0
- data/spec/spec_helper.rb +15 -12
- metadata +14 -20
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AudioBookCreator::Defaulter do
|
4
|
+
# sample to use for settings
|
5
|
+
let(:all_settings) do
|
6
|
+
{
|
7
|
+
"www.host.com" => {
|
8
|
+
:title_path => "h1.host",
|
9
|
+
:body_path => "div.host",
|
10
|
+
:link_path => "a.host",
|
11
|
+
:chapter_path => "a.chapter.host",
|
12
|
+
}
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:settings) { {} }
|
17
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1.title", "div", "a.link", "a.chapter") }
|
18
|
+
let(:book_def) do
|
19
|
+
AudioBookCreator::BookDef.new("the title", "author", "dir", %w(a b), true).tap do |bd|
|
20
|
+
bd.urls = %w(http://www.host.com/)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
subject { described_class.new(page_def, book_def).tap { |d| d.settings = settings } }
|
24
|
+
|
25
|
+
describe "#initialize" do
|
26
|
+
it { expect(subject.page_def).to eq(page_def) }
|
27
|
+
it { expect(subject.book_def).to eq(book_def) }
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#host" do
|
31
|
+
it "supports empty url" do
|
32
|
+
book_def.urls = %w()
|
33
|
+
expect(subject.host).to be_nil
|
34
|
+
end
|
35
|
+
|
36
|
+
it { expect(subject.host).to eq("www.host.com") }
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "#settings" do
|
40
|
+
# clear out settings so Defaulter can actually set it up correctly
|
41
|
+
before { subject.settings = nil }
|
42
|
+
|
43
|
+
it "sets filename" do
|
44
|
+
expect(subject.settings.filename).to eq("settings.db")
|
45
|
+
end
|
46
|
+
|
47
|
+
it "sets table_name" do
|
48
|
+
expect(subject.settings.table_name).to eq("settings")
|
49
|
+
end
|
50
|
+
|
51
|
+
it "sets table_name" do
|
52
|
+
expect(subject.settings.encode).to eq(true)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# settings => page_defs
|
57
|
+
describe "#load_unset_values" do
|
58
|
+
it "skips on nil" do
|
59
|
+
subject.settings = 1 # ensure this is not accessed (because host is empty)
|
60
|
+
book_def.urls = []
|
61
|
+
subject.load_unset_values
|
62
|
+
expect_page_def("h1.title", "div", "a.link", "a.chapter")
|
63
|
+
end
|
64
|
+
|
65
|
+
it "skips unknown hosts" do
|
66
|
+
subject.settings = {"host2.com" => {:title_path => "h1.host2"}}
|
67
|
+
subject.load_unset_values
|
68
|
+
expect_page_def("h1.title", "div", "a.link", "a.chapter")
|
69
|
+
end
|
70
|
+
|
71
|
+
it "uses a) hostname to b) set partial values" do
|
72
|
+
subject.settings = {"www.host.com" => {:title_path => "h1.host"}}
|
73
|
+
book_def.urls = %w(http://www.host.com/abc)
|
74
|
+
subject.load_unset_values
|
75
|
+
expect_page_def("h1.host", "div", "a.link", "a.chapter")
|
76
|
+
end
|
77
|
+
|
78
|
+
it "sets all values" do
|
79
|
+
subject.settings = all_settings
|
80
|
+
subject.load_unset_values
|
81
|
+
expect_page_def("h1.host", "div.host", "a.host", "a.chapter.host")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# page_defs => settings
|
86
|
+
describe "#store" do
|
87
|
+
let(:settings) { all_settings }
|
88
|
+
let(:page_def) { AudioBookCreator::PageDef.new(nil, nil, nil, nil) }
|
89
|
+
|
90
|
+
it "skips on empty url" do
|
91
|
+
subject.settings = 1 # ensure this is not accessed (because host is empty)
|
92
|
+
book_def.urls = []
|
93
|
+
expect { subject.store }.not_to raise_error
|
94
|
+
end
|
95
|
+
|
96
|
+
context "with unknown host" do
|
97
|
+
let(:settings) { {} }
|
98
|
+
|
99
|
+
it "adds settings" do
|
100
|
+
# no settings for this host
|
101
|
+
page_def.title_path = "h1.changed"
|
102
|
+
subject.store
|
103
|
+
expect_settings("h1.changed")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
it "updates only overridden values" do
|
108
|
+
page_def.title_path = "h1.changed"
|
109
|
+
subject.store
|
110
|
+
expect_settings("h1.changed", "div.host", "a.host", "a.chapter.host")
|
111
|
+
end
|
112
|
+
|
113
|
+
it "sets all values" do
|
114
|
+
page_def.title_path = "h1.changed"
|
115
|
+
page_def.body_path = "div.changed"
|
116
|
+
page_def.link_path = "a.changed"
|
117
|
+
page_def.chapter_path = "a.chapter.changed"
|
118
|
+
subject.store
|
119
|
+
expect_settings("h1.changed", "div.changed", "a.changed", "a.chapter.changed")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def expect_page_def(title_path, body_path, link_path, chapter_path)
|
124
|
+
expect(page_def.title_path).to eq(title_path)
|
125
|
+
expect(page_def.body_path).to eq(body_path)
|
126
|
+
expect(page_def.link_path).to eq(link_path)
|
127
|
+
expect(page_def.chapter_path).to eq(chapter_path)
|
128
|
+
end
|
129
|
+
|
130
|
+
def expect_settings(title_path = nil, body_path = nil, link_path = nil, chapter_path = nil)
|
131
|
+
value = settings[subject.host]
|
132
|
+
expect(value).not_to be_nil
|
133
|
+
if title_path
|
134
|
+
expect(value[:title_path]).to eq(title_path)
|
135
|
+
else
|
136
|
+
expect(value).not_to have_key(:title_path)
|
137
|
+
end
|
138
|
+
if body_path
|
139
|
+
expect(value[:body_path]).to eq(body_path)
|
140
|
+
else
|
141
|
+
expect(value).not_to have_key(:body_path)
|
142
|
+
end
|
143
|
+
if link_path
|
144
|
+
expect(value[:link_path]).to eq(link_path)
|
145
|
+
else
|
146
|
+
expect(value).not_to have_key(:link_path)
|
147
|
+
end
|
148
|
+
if chapter_path
|
149
|
+
expect(value[:chapter_path]).to eq(chapter_path)
|
150
|
+
else
|
151
|
+
expect(value).not_to have_key(:chapter_path)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -5,7 +5,7 @@ describe AudioBookCreator::Editor do
|
|
5
5
|
subject { described_class.new(page_def) }
|
6
6
|
let(:chapter1) { chapter("first\n\nsecond", "the title") }
|
7
7
|
it "should generate a page" do
|
8
|
-
expect(subject.parse([
|
8
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h1>the title</h1>",
|
9
9
|
"<p>first</p>", "<p>second</p>")
|
10
10
|
])).to eq([chapter1])
|
11
11
|
end
|
@@ -13,27 +13,27 @@ describe AudioBookCreator::Editor do
|
|
13
13
|
it "should respect content path" do
|
14
14
|
page_def.title_path = "h3"
|
15
15
|
page_def.body_path = "#story p"
|
16
|
-
expect(subject.parse([
|
16
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h3>the title</h3>",
|
17
17
|
"<div id='story'>", "<p>first</p>", "<p>second</p>", "</div>",
|
18
18
|
"<p>bad</p>")
|
19
19
|
])).to eq([chapter1])
|
20
20
|
end
|
21
21
|
|
22
22
|
it "should ignore body formatting" do
|
23
|
-
expect(subject.parse([
|
23
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h1>the title</h1>",
|
24
24
|
"<p><a href='#this'>first</a></p>", "<p><b>second</b></p>")
|
25
25
|
])).to eq([chapter1])
|
26
26
|
end
|
27
27
|
|
28
28
|
it "should parse multiple pages" do
|
29
|
-
expect(subject.parse([
|
30
|
-
|
29
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h1>p1</h1>", "<p>first</p>"),
|
30
|
+
web_page(uri("page2"), "page2", "<h1>p2</h1>", "<p>second</p>"),
|
31
31
|
])).to eq([chapter("first", "p1", 1), chapter("second", "p2", 2)])
|
32
32
|
end
|
33
33
|
|
34
34
|
it "should default the title if none found" do
|
35
|
-
expect(subject.parse([
|
36
|
-
|
35
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<p>first</p>"),
|
36
|
+
web_page(uri("page2"), "page2", "<p>second</p>"),
|
37
37
|
])).to eq([chapter("first", "Chapter 1", 1), chapter("second", "Chapter 2", 2)])
|
38
38
|
end
|
39
39
|
end
|
@@ -2,11 +2,12 @@ require "spec_helper"
|
|
2
2
|
require 'tempfile'
|
3
3
|
|
4
4
|
describe AudioBookCreator::PageDb do
|
5
|
-
subject {
|
5
|
+
subject { standard_db }
|
6
6
|
|
7
7
|
# all of these tests are in memory
|
8
8
|
# this is the only test that depends upon it
|
9
9
|
context "with memory databases" do
|
10
|
+
subject { standard_db(":memory:")}
|
10
11
|
it "does not create a file" do
|
11
12
|
# access key to trigger database creation
|
12
13
|
subject["key"]
|
@@ -14,30 +15,84 @@ describe AudioBookCreator::PageDb do
|
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
18
|
+
describe "#initialize" do
|
19
|
+
subject { described_class.new(":memory:", "tablename", true) }
|
20
|
+
it { expect(subject.filename).to eq(":memory:") }
|
21
|
+
it { expect(subject.table_name).to eq("tablename") }
|
22
|
+
it { expect(subject.encode).to eq(true) }
|
23
|
+
end
|
24
|
+
|
17
25
|
it "works" do
|
18
26
|
expect(subject).not_to be_nil
|
19
27
|
end
|
20
28
|
|
21
|
-
|
22
|
-
|
29
|
+
describe "#[]" do
|
30
|
+
it "finds value" do
|
31
|
+
subject["key"] = "value"
|
32
|
+
|
33
|
+
expect(subject["key"]).to eq("value")
|
34
|
+
end
|
35
|
+
|
36
|
+
it "finds nothing" do
|
37
|
+
expect(subject["key"]).to be_nil
|
38
|
+
end
|
39
|
+
|
40
|
+
context "with encoding db" do
|
41
|
+
subject { encoded_db }
|
42
|
+
it "find hashes" do
|
43
|
+
subject["key"] = {:name => "value"}
|
23
44
|
|
24
|
-
|
45
|
+
expect(subject["key"]).to eq({:name => "value"})
|
46
|
+
end
|
47
|
+
end
|
25
48
|
end
|
26
49
|
|
27
|
-
|
28
|
-
|
29
|
-
|
50
|
+
describe "#[]=" do
|
51
|
+
it "sets nils" do
|
52
|
+
subject["key"] = nil
|
53
|
+
|
54
|
+
expect(subject["key"]).to eq(nil)
|
55
|
+
end
|
56
|
+
|
57
|
+
it "sets value" do
|
58
|
+
subject["key"] = "value"
|
59
|
+
|
60
|
+
expect(subject["key"]).to eq("value")
|
61
|
+
end
|
62
|
+
|
63
|
+
context "with encoding db" do
|
64
|
+
subject { encoded_db }
|
65
|
+
|
66
|
+
it "sets nils" do
|
67
|
+
subject["key"] = nil
|
68
|
+
|
69
|
+
expect(subject["key"]).to eq(nil)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "sets hashes" do
|
73
|
+
subject["key"] = {:name => "value"}
|
74
|
+
|
75
|
+
expect(subject["key"]).to eq({:name => "value"})
|
76
|
+
end
|
77
|
+
end
|
30
78
|
end
|
31
79
|
|
32
|
-
|
33
|
-
|
80
|
+
describe "#include?" do
|
81
|
+
it "include good key" do
|
82
|
+
subject["key"] = "value"
|
83
|
+
expect(subject).to include("key")
|
84
|
+
end
|
85
|
+
|
86
|
+
it "doesnt include bad key" do
|
87
|
+
expect(subject).not_to include("key")
|
88
|
+
end
|
34
89
|
end
|
35
90
|
|
36
91
|
context "with prepopulated (file) database" do
|
37
92
|
let(:tmp) { Tempfile.new("db") }
|
38
93
|
|
39
94
|
before do
|
40
|
-
db =
|
95
|
+
db = standard_db(tmp.path)
|
41
96
|
db["key"] = "value"
|
42
97
|
end
|
43
98
|
|
@@ -47,7 +102,7 @@ describe AudioBookCreator::PageDb do
|
|
47
102
|
end
|
48
103
|
|
49
104
|
it "finds entry in previously created cache" do
|
50
|
-
db =
|
105
|
+
db = standard_db(tmp.path)
|
51
106
|
expect(db["key"]).to eq("value")
|
52
107
|
end
|
53
108
|
|
@@ -71,4 +126,11 @@ describe AudioBookCreator::PageDb do
|
|
71
126
|
expect(subject.map { |(n, v)| "#{n}:#{v}" }).to eq(%w(keyc:v keya:v keyz:v))
|
72
127
|
end
|
73
128
|
|
129
|
+
def standard_db(filename = ":memory:")
|
130
|
+
described_class.new(filename, "pages", false)
|
131
|
+
end
|
132
|
+
|
133
|
+
def encoded_db(filename = ":memory:")
|
134
|
+
described_class.new(filename, "settings", true)
|
135
|
+
end
|
74
136
|
end
|
@@ -10,69 +10,55 @@ describe AudioBookCreator::PageDef do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
context "with all parameters" do
|
13
|
-
subject { described_class.new("h1.title", "div", "a.link", "a.chapter") }
|
13
|
+
subject { described_class.new("h1.title", "div", "a.link", "a.chapter", {:url => true}) }
|
14
14
|
it { expect(subject.title_path).to eq("h1.title") }
|
15
15
|
it { expect(subject.body_path).to eq("div") }
|
16
16
|
it { expect(subject.link_path).to eq("a.link") }
|
17
17
|
it { expect(subject.chapter_path).to eq("a.chapter") }
|
18
|
-
|
19
|
-
|
20
|
-
describe "#title" do
|
21
|
-
context "with no title" do
|
22
|
-
let(:page) { dom("<p></p>")}
|
23
|
-
it { expect(subject.title(page)).to be_nil}
|
24
|
-
end
|
25
|
-
context "with title" do
|
26
|
-
let(:page) { dom("<h1>title</h1>")}
|
27
|
-
it { expect(subject.title(page)).to eq("title")}
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# NOTE: chapter uses array.join
|
32
|
-
describe "#body" do
|
33
|
-
context "with no body" do
|
34
|
-
let(:page) { dom("<h1></h1>")}
|
35
|
-
it { expect(subject.body(page)).to be_empty}
|
36
|
-
end
|
37
|
-
context "with body" do
|
38
|
-
let(:page) { dom("<p>p1</p>")}
|
39
|
-
it { expect(Array(subject.body(page)).join).to eq("p1") }
|
40
|
-
end
|
41
|
-
context "with many bodies" do
|
42
|
-
let(:page) { dom("<p>p1</p><p>p2</p><p>p3</p><p>p4</p>")}
|
43
|
-
it { expect(Array(subject.body(page)).join).to eq(%w(p1 p2 p3 p4).join) }
|
44
|
-
end
|
18
|
+
it { expect(subject.invalid_urls).to eq({:url => true}) }
|
45
19
|
end
|
46
20
|
|
47
21
|
describe "#page_links" do
|
22
|
+
let(:root) { uri("") }
|
48
23
|
context "with no page_links" do
|
49
|
-
let(:
|
50
|
-
it { expect(subject.page_links(
|
24
|
+
let(:wp) { web_page(root, "title","<p></p>")}
|
25
|
+
it { expect(subject.page_links(wp)).to be_empty}
|
51
26
|
end
|
52
27
|
context "with multiple page_links" do
|
53
|
-
let(:
|
54
|
-
it { expect(subject.page_links(
|
28
|
+
let(:wp) { web_page(root, "title", "<a href='tgt1'>a</a><a href='tgt2'>a</a>")}
|
29
|
+
it { expect(subject.page_links(wp)).to eq(uri(%w(tgt1 tgt2))) }
|
30
|
+
end
|
31
|
+
context "with bad page_links" do
|
32
|
+
before { subject.invalid_urls = {uri("bad") => false}}
|
33
|
+
let(:wp) { web_page(root, "title", "<a href='tgt1'>a</a><a href='bad'>a</a>")}
|
34
|
+
it { expect(subject.page_links(wp)).to eq(uri(%w(tgt1))) }
|
55
35
|
end
|
56
36
|
end
|
57
37
|
|
58
38
|
describe "#chapter_links" do
|
39
|
+
let(:root) { uri("") }
|
59
40
|
before { subject.chapter_path = "a.chapter"}
|
60
41
|
context "with no chapter_links" do
|
61
|
-
let(:
|
62
|
-
it { expect(subject.chapter_links(
|
42
|
+
let(:wp) { web_page(root, "title","<p></p>")}
|
43
|
+
it { expect(subject.chapter_links(wp)).to be_empty }
|
63
44
|
end
|
64
45
|
context "with only page_links" do
|
65
|
-
let(:
|
66
|
-
it { expect(subject.chapter_links(
|
46
|
+
let(:wp) { web_page(root, "title", "<p><a href='x'>x</a></p>")}
|
47
|
+
it { expect(subject.chapter_links(wp)).to be_empty }
|
67
48
|
end
|
68
49
|
context "with multiple chapter_links" do
|
69
|
-
let(:
|
70
|
-
it { expect(subject.chapter_links(
|
50
|
+
let(:wp) { web_page(root, "title", "<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
51
|
+
it { expect(subject.chapter_links(wp)).to eq(uri(%w(tgt1 tgt2))) }
|
71
52
|
end
|
72
53
|
context "with nil chapter_path" do
|
73
54
|
before { subject.chapter_path = nil }
|
74
|
-
let(:
|
75
|
-
it { expect(subject.chapter_links(
|
55
|
+
let(:wp) { web_page(root, "title", "<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
56
|
+
it { expect(subject.chapter_links(wp)).to be_empty }
|
57
|
+
end
|
58
|
+
context "with bad chapter_links" do
|
59
|
+
before { subject.invalid_urls = {uri("bad") => false}}
|
60
|
+
let(:wp) { web_page(root, "title", "<a class='chapter' href='tgt1'>a</a><a class='chapter' href='bad'>a</a>")}
|
61
|
+
it { expect(subject.chapter_links(wp)).to eq(uri(%w(tgt1))) }
|
76
62
|
end
|
77
63
|
end
|
78
64
|
end
|
@@ -6,7 +6,7 @@ describe AudioBookCreator::Speaker do
|
|
6
6
|
subject { described_class.new(speaker_def, book_def) }
|
7
7
|
it "should require a non empty chapter" do
|
8
8
|
expect_runner.not_to receive(:system)
|
9
|
-
expect { subject.say(chapter(nil)) }.to raise_error
|
9
|
+
expect { subject.say(chapter(nil)) }.to raise_error("Empty Chapter")
|
10
10
|
end
|
11
11
|
|
12
12
|
it "should do nothing if txt and mp4 file exist" do
|
@@ -61,7 +61,7 @@ describe AudioBookCreator::Speaker do
|
|
61
61
|
|
62
62
|
it "should freak if no chapters are passed in" do
|
63
63
|
expect_runner.not_to receive(:system)
|
64
|
-
expect { subject.say([]) }.to raise_error("Empty
|
64
|
+
expect { subject.say([]) }.to raise_error("Empty Chapter")
|
65
65
|
end
|
66
66
|
|
67
67
|
context "#make_directory_structure" do
|
@@ -2,47 +2,40 @@ require "spec_helper"
|
|
2
2
|
|
3
3
|
describe AudioBookCreator::Spider do
|
4
4
|
# set a max to prevent errors from causing infinite loops
|
5
|
-
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p", "a.page", "a.chapter") }
|
5
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p", "a.page", "a.chapter", invalid_urls) }
|
6
6
|
let(:web) { {} }
|
7
7
|
let(:invalid_urls) { {} }
|
8
8
|
# NOTE: could use arrays here, but put caps to catch bugs
|
9
|
-
subject { described_class.new(page_def, web
|
10
|
-
|
11
|
-
it "handles empty initializer" do
|
12
|
-
pristine = described_class.new(page_def)
|
13
|
-
expect(pristine.web).to be_a(Hash)
|
14
|
-
expect(pristine.invalid_urls).to be_a(Hash)
|
15
|
-
end
|
9
|
+
subject { described_class.new(page_def, web) }
|
16
10
|
|
17
11
|
it "sets arguments" do
|
18
12
|
expect(subject.page_def).to eq(page_def)
|
19
13
|
expect(subject.web).to eq(web)
|
20
|
-
expect(subject.invalid_urls).not_to be_nil
|
21
14
|
end
|
22
15
|
|
23
16
|
context "#visit" do
|
24
17
|
it "visit urls" do
|
25
18
|
expect_visit_page "page1", "x"
|
26
|
-
expect(subject.run(uri(%w(page1)))).to eq([
|
19
|
+
expect(subject.run(uri(%w(page1)))).to eq([web_page(uri("page1"), site("page1"), "x")])
|
27
20
|
end
|
28
21
|
|
29
22
|
it "visit string" do
|
30
23
|
expect_visit_page "page1", "x"
|
31
|
-
expect(subject.run(site(%w(page1)))).to eq([
|
24
|
+
expect(subject.run(site(%w(page1)))).to eq([web_page(uri("page1"), site("page1"), "x")])
|
32
25
|
end
|
33
26
|
|
34
27
|
it "visit multiple pages" do
|
35
28
|
expect_visit_page "page1"
|
36
29
|
expect_visit_page "page2"
|
37
30
|
expect(subject.run(uri(%w(page1 page2))))
|
38
|
-
.to eq([
|
31
|
+
.to eq([web_page(uri("page1"), site("page1")), web_page(uri("page2"), site("page2"))])
|
39
32
|
end
|
40
33
|
|
41
34
|
it "visit unique list of pages" do
|
42
35
|
expect_visit_page "page1", link("page2"), link("page2")
|
43
36
|
expect_visit_page "page2"
|
44
37
|
expect(subject.run uri(%w(page1)))
|
45
|
-
.to eq([
|
38
|
+
.to eq([web_page(uri("page1"), site("page1"),link("page2"), link("page2")), web_page(uri("page2"), site("page2"))])
|
46
39
|
end
|
47
40
|
|
48
41
|
it "skips loops from uri" do
|
@@ -88,7 +81,9 @@ describe AudioBookCreator::Spider do
|
|
88
81
|
p1_contents = "<a id='a1'>a1</a>", "<a href=''>a2</a>", "<a href='#a'>x</a>", link("page2")
|
89
82
|
expect_visit_page("page1", *p1_contents)
|
90
83
|
expect_visit_page("page2")
|
91
|
-
expect(subject.run uri(%w(page1))).to eq([
|
84
|
+
expect(subject.run uri(%w(page1))).to eq([
|
85
|
+
web_page(uri("page1"), site("page1"), *p1_contents),
|
86
|
+
web_page(uri("page2"), site("page2"))])
|
92
87
|
end
|
93
88
|
|
94
89
|
it "visits all pages once (and only once)" do
|
@@ -142,7 +137,7 @@ describe AudioBookCreator::Spider do
|
|
142
137
|
|
143
138
|
context "with invalid_urls" do
|
144
139
|
it "skips invalid_urls" do
|
145
|
-
expect(
|
140
|
+
expect(invalid_urls).to receive(:include?).with(uri("bad")).and_return(true)
|
146
141
|
expect_visit_page("page1", link("bad"))
|
147
142
|
subject.run uri(%w(page1))
|
148
143
|
end
|