audio_book_creator 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +2 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile +3 -3
- data/README.md +9 -4
- data/audio_book_creator.gemspec +3 -3
- data/{bin → exe}/audio_book_creator +3 -0
- data/lib/audio_book_creator.rb +4 -2
- data/lib/audio_book_creator/binder.rb +2 -1
- data/lib/audio_book_creator/book_def.rb +2 -2
- data/lib/audio_book_creator/cached_hash.rb +1 -1
- data/lib/audio_book_creator/cascading_array.rb +8 -8
- data/lib/audio_book_creator/chapter.rb +1 -1
- data/lib/audio_book_creator/cli.rb +36 -29
- data/lib/audio_book_creator/conductor.rb +5 -3
- data/lib/audio_book_creator/defaulter.rb +41 -0
- data/lib/audio_book_creator/editor.rb +2 -3
- data/lib/audio_book_creator/page_db.rb +14 -8
- data/lib/audio_book_creator/page_def.rb +7 -15
- data/lib/audio_book_creator/runner.rb +5 -3
- data/lib/audio_book_creator/speaker.rb +1 -1
- data/lib/audio_book_creator/spider.rb +9 -28
- data/lib/audio_book_creator/surfer_def.rb +1 -5
- data/lib/audio_book_creator/url_filter.rb +1 -1
- data/lib/audio_book_creator/version.rb +1 -1
- data/lib/audio_book_creator/web_page.rb +49 -0
- data/run_mutant +89 -0
- data/spec/audio_book_creator/binder_spec.rb +3 -3
- data/spec/audio_book_creator/book_creator_spec.rb +2 -3
- data/spec/audio_book_creator/book_def_spec.rb +33 -22
- data/spec/audio_book_creator/cached_hash_spec.rb +4 -0
- data/spec/audio_book_creator/cli_spec.rb +189 -122
- data/spec/audio_book_creator/conductor_spec.rb +17 -6
- data/spec/audio_book_creator/defaulter_spec.rb +154 -0
- data/spec/audio_book_creator/editor_spec.rb +7 -7
- data/spec/audio_book_creator/page_db_spec.rb +73 -11
- data/spec/audio_book_creator/page_def_spec.rb +26 -40
- data/spec/audio_book_creator/speaker_spec.rb +2 -2
- data/spec/audio_book_creator/spider_spec.rb +10 -15
- data/spec/audio_book_creator/surfer_def_spec.rb +1 -4
- data/spec/audio_book_creator/url_filter_spec.rb +1 -1
- data/spec/audio_book_creator/web_page_spec.rb +65 -0
- data/spec/audio_book_creator_spec.rb +23 -0
- data/spec/spec_helper.rb +15 -12
- metadata +14 -20
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe AudioBookCreator::Defaulter do
|
4
|
+
# sample to use for settings
|
5
|
+
let(:all_settings) do
|
6
|
+
{
|
7
|
+
"www.host.com" => {
|
8
|
+
:title_path => "h1.host",
|
9
|
+
:body_path => "div.host",
|
10
|
+
:link_path => "a.host",
|
11
|
+
:chapter_path => "a.chapter.host",
|
12
|
+
}
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:settings) { {} }
|
17
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1.title", "div", "a.link", "a.chapter") }
|
18
|
+
let(:book_def) do
|
19
|
+
AudioBookCreator::BookDef.new("the title", "author", "dir", %w(a b), true).tap do |bd|
|
20
|
+
bd.urls = %w(http://www.host.com/)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
subject { described_class.new(page_def, book_def).tap { |d| d.settings = settings } }
|
24
|
+
|
25
|
+
describe "#initialize" do
|
26
|
+
it { expect(subject.page_def).to eq(page_def) }
|
27
|
+
it { expect(subject.book_def).to eq(book_def) }
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#host" do
|
31
|
+
it "supports empty url" do
|
32
|
+
book_def.urls = %w()
|
33
|
+
expect(subject.host).to be_nil
|
34
|
+
end
|
35
|
+
|
36
|
+
it { expect(subject.host).to eq("www.host.com") }
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "#settings" do
|
40
|
+
# clear out settings so Defaulter can actually set it up correctly
|
41
|
+
before { subject.settings = nil }
|
42
|
+
|
43
|
+
it "sets filename" do
|
44
|
+
expect(subject.settings.filename).to eq("settings.db")
|
45
|
+
end
|
46
|
+
|
47
|
+
it "sets table_name" do
|
48
|
+
expect(subject.settings.table_name).to eq("settings")
|
49
|
+
end
|
50
|
+
|
51
|
+
it "sets table_name" do
|
52
|
+
expect(subject.settings.encode).to eq(true)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# settings => page_defs
|
57
|
+
describe "#load_unset_values" do
|
58
|
+
it "skips on nil" do
|
59
|
+
subject.settings = 1 # ensure this is not accessed (because host is empty)
|
60
|
+
book_def.urls = []
|
61
|
+
subject.load_unset_values
|
62
|
+
expect_page_def("h1.title", "div", "a.link", "a.chapter")
|
63
|
+
end
|
64
|
+
|
65
|
+
it "skips unknown hosts" do
|
66
|
+
subject.settings = {"host2.com" => {:title_path => "h1.host2"}}
|
67
|
+
subject.load_unset_values
|
68
|
+
expect_page_def("h1.title", "div", "a.link", "a.chapter")
|
69
|
+
end
|
70
|
+
|
71
|
+
it "uses a) hostname to b) set partial values" do
|
72
|
+
subject.settings = {"www.host.com" => {:title_path => "h1.host"}}
|
73
|
+
book_def.urls = %w(http://www.host.com/abc)
|
74
|
+
subject.load_unset_values
|
75
|
+
expect_page_def("h1.host", "div", "a.link", "a.chapter")
|
76
|
+
end
|
77
|
+
|
78
|
+
it "sets all values" do
|
79
|
+
subject.settings = all_settings
|
80
|
+
subject.load_unset_values
|
81
|
+
expect_page_def("h1.host", "div.host", "a.host", "a.chapter.host")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# page_defs => settings
|
86
|
+
describe "#store" do
|
87
|
+
let(:settings) { all_settings }
|
88
|
+
let(:page_def) { AudioBookCreator::PageDef.new(nil, nil, nil, nil) }
|
89
|
+
|
90
|
+
it "skips on empty url" do
|
91
|
+
subject.settings = 1 # ensure this is not accessed (because host is empty)
|
92
|
+
book_def.urls = []
|
93
|
+
expect { subject.store }.not_to raise_error
|
94
|
+
end
|
95
|
+
|
96
|
+
context "with unknown host" do
|
97
|
+
let(:settings) { {} }
|
98
|
+
|
99
|
+
it "adds settings" do
|
100
|
+
# no settings for this host
|
101
|
+
page_def.title_path = "h1.changed"
|
102
|
+
subject.store
|
103
|
+
expect_settings("h1.changed")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
it "updates only overridden values" do
|
108
|
+
page_def.title_path = "h1.changed"
|
109
|
+
subject.store
|
110
|
+
expect_settings("h1.changed", "div.host", "a.host", "a.chapter.host")
|
111
|
+
end
|
112
|
+
|
113
|
+
it "sets all values" do
|
114
|
+
page_def.title_path = "h1.changed"
|
115
|
+
page_def.body_path = "div.changed"
|
116
|
+
page_def.link_path = "a.changed"
|
117
|
+
page_def.chapter_path = "a.chapter.changed"
|
118
|
+
subject.store
|
119
|
+
expect_settings("h1.changed", "div.changed", "a.changed", "a.chapter.changed")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def expect_page_def(title_path, body_path, link_path, chapter_path)
|
124
|
+
expect(page_def.title_path).to eq(title_path)
|
125
|
+
expect(page_def.body_path).to eq(body_path)
|
126
|
+
expect(page_def.link_path).to eq(link_path)
|
127
|
+
expect(page_def.chapter_path).to eq(chapter_path)
|
128
|
+
end
|
129
|
+
|
130
|
+
def expect_settings(title_path = nil, body_path = nil, link_path = nil, chapter_path = nil)
|
131
|
+
value = settings[subject.host]
|
132
|
+
expect(value).not_to be_nil
|
133
|
+
if title_path
|
134
|
+
expect(value[:title_path]).to eq(title_path)
|
135
|
+
else
|
136
|
+
expect(value).not_to have_key(:title_path)
|
137
|
+
end
|
138
|
+
if body_path
|
139
|
+
expect(value[:body_path]).to eq(body_path)
|
140
|
+
else
|
141
|
+
expect(value).not_to have_key(:body_path)
|
142
|
+
end
|
143
|
+
if link_path
|
144
|
+
expect(value[:link_path]).to eq(link_path)
|
145
|
+
else
|
146
|
+
expect(value).not_to have_key(:link_path)
|
147
|
+
end
|
148
|
+
if chapter_path
|
149
|
+
expect(value[:chapter_path]).to eq(chapter_path)
|
150
|
+
else
|
151
|
+
expect(value).not_to have_key(:chapter_path)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -5,7 +5,7 @@ describe AudioBookCreator::Editor do
|
|
5
5
|
subject { described_class.new(page_def) }
|
6
6
|
let(:chapter1) { chapter("first\n\nsecond", "the title") }
|
7
7
|
it "should generate a page" do
|
8
|
-
expect(subject.parse([
|
8
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h1>the title</h1>",
|
9
9
|
"<p>first</p>", "<p>second</p>")
|
10
10
|
])).to eq([chapter1])
|
11
11
|
end
|
@@ -13,27 +13,27 @@ describe AudioBookCreator::Editor do
|
|
13
13
|
it "should respect content path" do
|
14
14
|
page_def.title_path = "h3"
|
15
15
|
page_def.body_path = "#story p"
|
16
|
-
expect(subject.parse([
|
16
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h3>the title</h3>",
|
17
17
|
"<div id='story'>", "<p>first</p>", "<p>second</p>", "</div>",
|
18
18
|
"<p>bad</p>")
|
19
19
|
])).to eq([chapter1])
|
20
20
|
end
|
21
21
|
|
22
22
|
it "should ignore body formatting" do
|
23
|
-
expect(subject.parse([
|
23
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h1>the title</h1>",
|
24
24
|
"<p><a href='#this'>first</a></p>", "<p><b>second</b></p>")
|
25
25
|
])).to eq([chapter1])
|
26
26
|
end
|
27
27
|
|
28
28
|
it "should parse multiple pages" do
|
29
|
-
expect(subject.parse([
|
30
|
-
|
29
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<h1>p1</h1>", "<p>first</p>"),
|
30
|
+
web_page(uri("page2"), "page2", "<h1>p2</h1>", "<p>second</p>"),
|
31
31
|
])).to eq([chapter("first", "p1", 1), chapter("second", "p2", 2)])
|
32
32
|
end
|
33
33
|
|
34
34
|
it "should default the title if none found" do
|
35
|
-
expect(subject.parse([
|
36
|
-
|
35
|
+
expect(subject.parse([web_page(uri("page1"), "page1", "<p>first</p>"),
|
36
|
+
web_page(uri("page2"), "page2", "<p>second</p>"),
|
37
37
|
])).to eq([chapter("first", "Chapter 1", 1), chapter("second", "Chapter 2", 2)])
|
38
38
|
end
|
39
39
|
end
|
@@ -2,11 +2,12 @@ require "spec_helper"
|
|
2
2
|
require 'tempfile'
|
3
3
|
|
4
4
|
describe AudioBookCreator::PageDb do
|
5
|
-
subject {
|
5
|
+
subject { standard_db }
|
6
6
|
|
7
7
|
# all of these tests are in memory
|
8
8
|
# this is the only test that depends upon it
|
9
9
|
context "with memory databases" do
|
10
|
+
subject { standard_db(":memory:")}
|
10
11
|
it "does not create a file" do
|
11
12
|
# access key to trigger database creation
|
12
13
|
subject["key"]
|
@@ -14,30 +15,84 @@ describe AudioBookCreator::PageDb do
|
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
18
|
+
describe "#initialize" do
|
19
|
+
subject { described_class.new(":memory:", "tablename", true) }
|
20
|
+
it { expect(subject.filename).to eq(":memory:") }
|
21
|
+
it { expect(subject.table_name).to eq("tablename") }
|
22
|
+
it { expect(subject.encode).to eq(true) }
|
23
|
+
end
|
24
|
+
|
17
25
|
it "works" do
|
18
26
|
expect(subject).not_to be_nil
|
19
27
|
end
|
20
28
|
|
21
|
-
|
22
|
-
|
29
|
+
describe "#[]" do
|
30
|
+
it "finds value" do
|
31
|
+
subject["key"] = "value"
|
32
|
+
|
33
|
+
expect(subject["key"]).to eq("value")
|
34
|
+
end
|
35
|
+
|
36
|
+
it "finds nothing" do
|
37
|
+
expect(subject["key"]).to be_nil
|
38
|
+
end
|
39
|
+
|
40
|
+
context "with encoding db" do
|
41
|
+
subject { encoded_db }
|
42
|
+
it "find hashes" do
|
43
|
+
subject["key"] = {:name => "value"}
|
23
44
|
|
24
|
-
|
45
|
+
expect(subject["key"]).to eq({:name => "value"})
|
46
|
+
end
|
47
|
+
end
|
25
48
|
end
|
26
49
|
|
27
|
-
|
28
|
-
|
29
|
-
|
50
|
+
describe "#[]=" do
|
51
|
+
it "sets nils" do
|
52
|
+
subject["key"] = nil
|
53
|
+
|
54
|
+
expect(subject["key"]).to eq(nil)
|
55
|
+
end
|
56
|
+
|
57
|
+
it "sets value" do
|
58
|
+
subject["key"] = "value"
|
59
|
+
|
60
|
+
expect(subject["key"]).to eq("value")
|
61
|
+
end
|
62
|
+
|
63
|
+
context "with encoding db" do
|
64
|
+
subject { encoded_db }
|
65
|
+
|
66
|
+
it "sets nils" do
|
67
|
+
subject["key"] = nil
|
68
|
+
|
69
|
+
expect(subject["key"]).to eq(nil)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "sets hashes" do
|
73
|
+
subject["key"] = {:name => "value"}
|
74
|
+
|
75
|
+
expect(subject["key"]).to eq({:name => "value"})
|
76
|
+
end
|
77
|
+
end
|
30
78
|
end
|
31
79
|
|
32
|
-
|
33
|
-
|
80
|
+
describe "#include?" do
|
81
|
+
it "include good key" do
|
82
|
+
subject["key"] = "value"
|
83
|
+
expect(subject).to include("key")
|
84
|
+
end
|
85
|
+
|
86
|
+
it "doesnt include bad key" do
|
87
|
+
expect(subject).not_to include("key")
|
88
|
+
end
|
34
89
|
end
|
35
90
|
|
36
91
|
context "with prepopulated (file) database" do
|
37
92
|
let(:tmp) { Tempfile.new("db") }
|
38
93
|
|
39
94
|
before do
|
40
|
-
db =
|
95
|
+
db = standard_db(tmp.path)
|
41
96
|
db["key"] = "value"
|
42
97
|
end
|
43
98
|
|
@@ -47,7 +102,7 @@ describe AudioBookCreator::PageDb do
|
|
47
102
|
end
|
48
103
|
|
49
104
|
it "finds entry in previously created cache" do
|
50
|
-
db =
|
105
|
+
db = standard_db(tmp.path)
|
51
106
|
expect(db["key"]).to eq("value")
|
52
107
|
end
|
53
108
|
|
@@ -71,4 +126,11 @@ describe AudioBookCreator::PageDb do
|
|
71
126
|
expect(subject.map { |(n, v)| "#{n}:#{v}" }).to eq(%w(keyc:v keya:v keyz:v))
|
72
127
|
end
|
73
128
|
|
129
|
+
def standard_db(filename = ":memory:")
|
130
|
+
described_class.new(filename, "pages", false)
|
131
|
+
end
|
132
|
+
|
133
|
+
def encoded_db(filename = ":memory:")
|
134
|
+
described_class.new(filename, "settings", true)
|
135
|
+
end
|
74
136
|
end
|
@@ -10,69 +10,55 @@ describe AudioBookCreator::PageDef do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
context "with all parameters" do
|
13
|
-
subject { described_class.new("h1.title", "div", "a.link", "a.chapter") }
|
13
|
+
subject { described_class.new("h1.title", "div", "a.link", "a.chapter", {:url => true}) }
|
14
14
|
it { expect(subject.title_path).to eq("h1.title") }
|
15
15
|
it { expect(subject.body_path).to eq("div") }
|
16
16
|
it { expect(subject.link_path).to eq("a.link") }
|
17
17
|
it { expect(subject.chapter_path).to eq("a.chapter") }
|
18
|
-
|
19
|
-
|
20
|
-
describe "#title" do
|
21
|
-
context "with no title" do
|
22
|
-
let(:page) { dom("<p></p>")}
|
23
|
-
it { expect(subject.title(page)).to be_nil}
|
24
|
-
end
|
25
|
-
context "with title" do
|
26
|
-
let(:page) { dom("<h1>title</h1>")}
|
27
|
-
it { expect(subject.title(page)).to eq("title")}
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# NOTE: chapter uses array.join
|
32
|
-
describe "#body" do
|
33
|
-
context "with no body" do
|
34
|
-
let(:page) { dom("<h1></h1>")}
|
35
|
-
it { expect(subject.body(page)).to be_empty}
|
36
|
-
end
|
37
|
-
context "with body" do
|
38
|
-
let(:page) { dom("<p>p1</p>")}
|
39
|
-
it { expect(Array(subject.body(page)).join).to eq("p1") }
|
40
|
-
end
|
41
|
-
context "with many bodies" do
|
42
|
-
let(:page) { dom("<p>p1</p><p>p2</p><p>p3</p><p>p4</p>")}
|
43
|
-
it { expect(Array(subject.body(page)).join).to eq(%w(p1 p2 p3 p4).join) }
|
44
|
-
end
|
18
|
+
it { expect(subject.invalid_urls).to eq({:url => true}) }
|
45
19
|
end
|
46
20
|
|
47
21
|
describe "#page_links" do
|
22
|
+
let(:root) { uri("") }
|
48
23
|
context "with no page_links" do
|
49
|
-
let(:
|
50
|
-
it { expect(subject.page_links(
|
24
|
+
let(:wp) { web_page(root, "title","<p></p>")}
|
25
|
+
it { expect(subject.page_links(wp)).to be_empty}
|
51
26
|
end
|
52
27
|
context "with multiple page_links" do
|
53
|
-
let(:
|
54
|
-
it { expect(subject.page_links(
|
28
|
+
let(:wp) { web_page(root, "title", "<a href='tgt1'>a</a><a href='tgt2'>a</a>")}
|
29
|
+
it { expect(subject.page_links(wp)).to eq(uri(%w(tgt1 tgt2))) }
|
30
|
+
end
|
31
|
+
context "with bad page_links" do
|
32
|
+
before { subject.invalid_urls = {uri("bad") => false}}
|
33
|
+
let(:wp) { web_page(root, "title", "<a href='tgt1'>a</a><a href='bad'>a</a>")}
|
34
|
+
it { expect(subject.page_links(wp)).to eq(uri(%w(tgt1))) }
|
55
35
|
end
|
56
36
|
end
|
57
37
|
|
58
38
|
describe "#chapter_links" do
|
39
|
+
let(:root) { uri("") }
|
59
40
|
before { subject.chapter_path = "a.chapter"}
|
60
41
|
context "with no chapter_links" do
|
61
|
-
let(:
|
62
|
-
it { expect(subject.chapter_links(
|
42
|
+
let(:wp) { web_page(root, "title","<p></p>")}
|
43
|
+
it { expect(subject.chapter_links(wp)).to be_empty }
|
63
44
|
end
|
64
45
|
context "with only page_links" do
|
65
|
-
let(:
|
66
|
-
it { expect(subject.chapter_links(
|
46
|
+
let(:wp) { web_page(root, "title", "<p><a href='x'>x</a></p>")}
|
47
|
+
it { expect(subject.chapter_links(wp)).to be_empty }
|
67
48
|
end
|
68
49
|
context "with multiple chapter_links" do
|
69
|
-
let(:
|
70
|
-
it { expect(subject.chapter_links(
|
50
|
+
let(:wp) { web_page(root, "title", "<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
51
|
+
it { expect(subject.chapter_links(wp)).to eq(uri(%w(tgt1 tgt2))) }
|
71
52
|
end
|
72
53
|
context "with nil chapter_path" do
|
73
54
|
before { subject.chapter_path = nil }
|
74
|
-
let(:
|
75
|
-
it { expect(subject.chapter_links(
|
55
|
+
let(:wp) { web_page(root, "title", "<a class='chapter' href='tgt1'>a</a><a class='chapter' href='tgt2'>a</a>") }
|
56
|
+
it { expect(subject.chapter_links(wp)).to be_empty }
|
57
|
+
end
|
58
|
+
context "with bad chapter_links" do
|
59
|
+
before { subject.invalid_urls = {uri("bad") => false}}
|
60
|
+
let(:wp) { web_page(root, "title", "<a class='chapter' href='tgt1'>a</a><a class='chapter' href='bad'>a</a>")}
|
61
|
+
it { expect(subject.chapter_links(wp)).to eq(uri(%w(tgt1))) }
|
76
62
|
end
|
77
63
|
end
|
78
64
|
end
|
@@ -6,7 +6,7 @@ describe AudioBookCreator::Speaker do
|
|
6
6
|
subject { described_class.new(speaker_def, book_def) }
|
7
7
|
it "should require a non empty chapter" do
|
8
8
|
expect_runner.not_to receive(:system)
|
9
|
-
expect { subject.say(chapter(nil)) }.to raise_error
|
9
|
+
expect { subject.say(chapter(nil)) }.to raise_error("Empty Chapter")
|
10
10
|
end
|
11
11
|
|
12
12
|
it "should do nothing if txt and mp4 file exist" do
|
@@ -61,7 +61,7 @@ describe AudioBookCreator::Speaker do
|
|
61
61
|
|
62
62
|
it "should freak if no chapters are passed in" do
|
63
63
|
expect_runner.not_to receive(:system)
|
64
|
-
expect { subject.say([]) }.to raise_error("Empty
|
64
|
+
expect { subject.say([]) }.to raise_error("Empty Chapter")
|
65
65
|
end
|
66
66
|
|
67
67
|
context "#make_directory_structure" do
|
@@ -2,47 +2,40 @@ require "spec_helper"
|
|
2
2
|
|
3
3
|
describe AudioBookCreator::Spider do
|
4
4
|
# set a max to prevent errors from causing infinite loops
|
5
|
-
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p", "a.page", "a.chapter") }
|
5
|
+
let(:page_def) { AudioBookCreator::PageDef.new("h1", "p", "a.page", "a.chapter", invalid_urls) }
|
6
6
|
let(:web) { {} }
|
7
7
|
let(:invalid_urls) { {} }
|
8
8
|
# NOTE: could use arrays here, but put caps to catch bugs
|
9
|
-
subject { described_class.new(page_def, web
|
10
|
-
|
11
|
-
it "handles empty initializer" do
|
12
|
-
pristine = described_class.new(page_def)
|
13
|
-
expect(pristine.web).to be_a(Hash)
|
14
|
-
expect(pristine.invalid_urls).to be_a(Hash)
|
15
|
-
end
|
9
|
+
subject { described_class.new(page_def, web) }
|
16
10
|
|
17
11
|
it "sets arguments" do
|
18
12
|
expect(subject.page_def).to eq(page_def)
|
19
13
|
expect(subject.web).to eq(web)
|
20
|
-
expect(subject.invalid_urls).not_to be_nil
|
21
14
|
end
|
22
15
|
|
23
16
|
context "#visit" do
|
24
17
|
it "visit urls" do
|
25
18
|
expect_visit_page "page1", "x"
|
26
|
-
expect(subject.run(uri(%w(page1)))).to eq([
|
19
|
+
expect(subject.run(uri(%w(page1)))).to eq([web_page(uri("page1"), site("page1"), "x")])
|
27
20
|
end
|
28
21
|
|
29
22
|
it "visit string" do
|
30
23
|
expect_visit_page "page1", "x"
|
31
|
-
expect(subject.run(site(%w(page1)))).to eq([
|
24
|
+
expect(subject.run(site(%w(page1)))).to eq([web_page(uri("page1"), site("page1"), "x")])
|
32
25
|
end
|
33
26
|
|
34
27
|
it "visit multiple pages" do
|
35
28
|
expect_visit_page "page1"
|
36
29
|
expect_visit_page "page2"
|
37
30
|
expect(subject.run(uri(%w(page1 page2))))
|
38
|
-
.to eq([
|
31
|
+
.to eq([web_page(uri("page1"), site("page1")), web_page(uri("page2"), site("page2"))])
|
39
32
|
end
|
40
33
|
|
41
34
|
it "visit unique list of pages" do
|
42
35
|
expect_visit_page "page1", link("page2"), link("page2")
|
43
36
|
expect_visit_page "page2"
|
44
37
|
expect(subject.run uri(%w(page1)))
|
45
|
-
.to eq([
|
38
|
+
.to eq([web_page(uri("page1"), site("page1"),link("page2"), link("page2")), web_page(uri("page2"), site("page2"))])
|
46
39
|
end
|
47
40
|
|
48
41
|
it "skips loops from uri" do
|
@@ -88,7 +81,9 @@ describe AudioBookCreator::Spider do
|
|
88
81
|
p1_contents = "<a id='a1'>a1</a>", "<a href=''>a2</a>", "<a href='#a'>x</a>", link("page2")
|
89
82
|
expect_visit_page("page1", *p1_contents)
|
90
83
|
expect_visit_page("page2")
|
91
|
-
expect(subject.run uri(%w(page1))).to eq([
|
84
|
+
expect(subject.run uri(%w(page1))).to eq([
|
85
|
+
web_page(uri("page1"), site("page1"), *p1_contents),
|
86
|
+
web_page(uri("page2"), site("page2"))])
|
92
87
|
end
|
93
88
|
|
94
89
|
it "visits all pages once (and only once)" do
|
@@ -142,7 +137,7 @@ describe AudioBookCreator::Spider do
|
|
142
137
|
|
143
138
|
context "with invalid_urls" do
|
144
139
|
it "skips invalid_urls" do
|
145
|
-
expect(
|
140
|
+
expect(invalid_urls).to receive(:include?).with(uri("bad")).and_return(true)
|
146
141
|
expect_visit_page("page1", link("bad"))
|
147
142
|
subject.run uri(%w(page1))
|
148
143
|
end
|