repub 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +25 -9
- data/README.rdoc +46 -40
- data/Rakefile +1 -0
- data/bin/repub +1 -1
- data/lib/repub.rb +1 -1
- data/lib/repub/app.rb +3 -3
- data/lib/repub/app/builder.rb +84 -36
- data/lib/repub/app/fetcher.rb +13 -11
- data/lib/repub/app/options.rb +36 -5
- data/lib/repub/app/parser.rb +1 -1
- data/lib/repub/app/profile.rb +16 -15
- data/lib/repub/epub/container.rb +28 -28
- data/lib/repub/epub/content.rb +59 -34
- data/lib/repub/epub/toc.rb +139 -139
- data/repub.gemspec +3 -3
- data/test/data/custom.css +3 -0
- data/test/data/invisiblellama.png +0 -0
- data/test/data/test.css +5 -0
- data/test/data/test.html +60 -0
- data/test/epub/test_container.rb +4 -4
- data/test/epub/test_content.rb +42 -38
- data/test/epub/test_toc.rb +19 -7
- data/test/test_builder.rb +145 -1
- data/test/test_fetcher.rb +79 -20
- data/test/test_parser.rb +45 -32
- metadata +6 -2
data/repub.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{repub}
|
5
|
-
s.version = "0.3.
|
5
|
+
s.version = "0.3.3"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Dmitri Goutnik"]
|
9
|
-
s.date = %q{2009-
|
9
|
+
s.date = %q{2009-07-05}
|
10
10
|
s.default_executable = %q{repub}
|
11
11
|
s.description = %q{Repub is a simple HTML to ePub converter.
|
12
12
|
|
@@ -16,7 +16,7 @@ ePub documents.}
|
|
16
16
|
s.email = %q{dg@invisiblellama.net}
|
17
17
|
s.executables = ["repub"]
|
18
18
|
s.extra_rdoc_files = ["History.txt", "README.rdoc", "bin/repub"]
|
19
|
-
s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
|
19
|
+
s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/data/custom.css", "test/data/invisiblellama.png", "test/data/test.css", "test/data/test.html", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
|
20
20
|
s.homepage = %q{http://rubyforge.org/projects/repub/}
|
21
21
|
s.rdoc_options = ["--main", "README.rdoc"]
|
22
22
|
s.require_paths = ["lib"]
|
Binary file
|
data/test/data/test.css
ADDED
data/test/data/test.html
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>Test Page</title>
|
4
|
+
<link rel='stylesheet' type='text/css' href='test.css'/>
|
5
|
+
<style type='text/css'>
|
6
|
+
h1 {
|
7
|
+
font-size: 4em;
|
8
|
+
}
|
9
|
+
div.img {
|
10
|
+
text-align: right;
|
11
|
+
background-color: #000;
|
12
|
+
padding: 10px;
|
13
|
+
}
|
14
|
+
div.img img {
|
15
|
+
border: none;
|
16
|
+
}
|
17
|
+
</style>
|
18
|
+
</head>
|
19
|
+
|
20
|
+
<body>
|
21
|
+
<div class='img'>
|
22
|
+
<img src='invisiblellama.png' alt='invisible llama'/>
|
23
|
+
</div>
|
24
|
+
|
25
|
+
<h1>Lorem Ipsum</h1>
|
26
|
+
|
27
|
+
<ul>
|
28
|
+
<li>
|
29
|
+
<a href='#c1'>Chapter 1</a>
|
30
|
+
<ul>
|
31
|
+
<li><a href='#c11'>Chapter 1.1</a></li>
|
32
|
+
<li><a href='#c12'>Chapter 1.2</a></li>
|
33
|
+
</ul>
|
34
|
+
</li>
|
35
|
+
<li>
|
36
|
+
<a href='#c2'>Chapter 2</a>
|
37
|
+
<ul>
|
38
|
+
<li><a href='#c21'>Chapter 2.1</a></li>
|
39
|
+
</ul>
|
40
|
+
</li>
|
41
|
+
<li>
|
42
|
+
<a href='#c3'>Chapter 3</a>
|
43
|
+
</li>
|
44
|
+
</ul>
|
45
|
+
|
46
|
+
<h1><a id='c1'/>Chapter 1</h1>
|
47
|
+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
48
|
+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
49
|
+
<h3><a id='c11'/>Chapter 1.1</h3>
|
50
|
+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
51
|
+
<h3><a id='c12'/>Chapter 1.2</h3>
|
52
|
+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
53
|
+
<h1><a id='c2'/>Chapter 2</h1>
|
54
|
+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
55
|
+
<h3><a id='c21'/>Chapter 2.1</h3>
|
56
|
+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
57
|
+
<h1><a id='c3'/>Chapter 3</h1>
|
58
|
+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
59
|
+
</body>
|
60
|
+
</html>
|
data/test/epub/test_container.rb
CHANGED
@@ -7,9 +7,9 @@ class TestContainer < Test::Unit::TestCase
|
|
7
7
|
def test_container_create
|
8
8
|
c = Repub::Epub::Container.new
|
9
9
|
s = c.to_xml
|
10
|
-
doc = Nokogiri::
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
doc = Nokogiri::XML.parse(s)
|
11
|
+
assert_not_nil(doc.at('rootfile'))
|
12
|
+
assert_equal('content.opf', doc.at('rootfile')['full-path'])
|
13
|
+
assert_equal('application/oebps-package+xml', doc.at('rootfile')['media-type'])
|
14
14
|
end
|
15
15
|
end
|
data/test/epub/test_content.rb
CHANGED
@@ -4,53 +4,57 @@ require 'nokogiri'
|
|
4
4
|
require 'repub/epub'
|
5
5
|
|
6
6
|
class TestContent < Test::Unit::TestCase
|
7
|
+
def test_create
|
8
|
+
x = Repub::Epub::Content.new('some-name')
|
9
|
+
s = x.to_xml
|
10
|
+
doc = Nokogiri::XML.parse(s)
|
11
|
+
#p doc
|
12
|
+
|
13
|
+
metadata = doc.at('metadata')
|
14
|
+
assert_not_nil(metadata)
|
15
|
+
assert_equal('some-name', metadata.xpath('dc:identifier', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
|
16
|
+
assert_equal('Untitled', metadata.xpath('dc:title', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
|
17
|
+
assert_equal('en', metadata.xpath('dc:language', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
|
18
|
+
assert_equal(Date.today.to_s, metadata.xpath('dc:date', 'xmlns:dc' => "http://purl.org/dc/elements/1.1/").inner_text)
|
19
|
+
end
|
20
|
+
|
7
21
|
def test_manifest_create
|
8
22
|
x = Repub::Epub::Content.new('some-name')
|
9
23
|
s = x.to_xml
|
10
|
-
|
11
|
-
doc
|
24
|
+
doc = Nokogiri::XML.parse(s)
|
25
|
+
#p doc
|
12
26
|
|
13
|
-
|
14
|
-
assert_not_nil(
|
15
|
-
|
16
|
-
assert_equal(
|
17
|
-
|
18
|
-
assert_equal(
|
19
|
-
# spine was created
|
20
|
-
assert_not_nil(doc.search('spine'))
|
21
|
-
# and is empty
|
22
|
-
assert_equal(0, doc.search('spine/item').size)
|
27
|
+
manifest = doc.at('manifest')
|
28
|
+
assert_not_nil(manifest)
|
29
|
+
assert_equal(1, manifest.children.size)
|
30
|
+
assert_equal('ncx', manifest.at('item')['id'])
|
31
|
+
assert_not_nil(doc.at('spine'))
|
32
|
+
assert_equal(0, doc.xpath('spine/item').size)
|
23
33
|
end
|
24
34
|
|
25
|
-
def
|
35
|
+
def test_manifest_items
|
26
36
|
x = Repub::Epub::Content.new('some-name')
|
27
|
-
x.
|
28
|
-
x.
|
29
|
-
x.
|
30
|
-
x.
|
31
|
-
x.
|
32
|
-
x.
|
33
|
-
x.
|
34
|
-
x.
|
35
|
-
x.add_document 'glossary.html', 'glossary'
|
37
|
+
x.add_item 'style.css'
|
38
|
+
x.add_item 'more-style.css'
|
39
|
+
x.add_item ' logo.jpg '
|
40
|
+
x.add_item ' image.png'
|
41
|
+
x.add_item 'picture.jpeg '
|
42
|
+
x.add_item 'intro.html', 'intro'
|
43
|
+
x.add_item 'chapter-1.html'
|
44
|
+
x.add_item 'glossary.html', 'glossary'
|
36
45
|
s = x.to_xml
|
37
|
-
#puts s
|
38
46
|
doc = Nokogiri::HTML(s)
|
47
|
+
#p doc
|
39
48
|
|
40
|
-
|
41
|
-
assert_not_nil(
|
42
|
-
|
43
|
-
assert_equal(2,
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
assert_equal(
|
48
|
-
|
49
|
-
|
50
|
-
# and has 3 html items
|
51
|
-
assert_equal(3, doc.search('spine/itemref').size)
|
52
|
-
# check that order is as inserted and ids are correct
|
53
|
-
assert_equal('intro', doc.search('spine/itemref')[0]['idref'])
|
54
|
-
assert_equal('glossary', doc.search('spine/itemref')[2]['idref'])
|
49
|
+
manifest = doc.at('manifest')
|
50
|
+
assert_not_nil(manifest)
|
51
|
+
assert_equal(2, manifest.xpath('item[@media-type="text/css"]').size)
|
52
|
+
assert_equal(2, manifest.search('item[@media-type="image/jpeg"]').size)
|
53
|
+
assert_equal(1, manifest.search('item[@media-type="image/png"]').size)
|
54
|
+
|
55
|
+
spine = doc.at('spine')
|
56
|
+
assert_equal(3, spine.search('itemref').size)
|
57
|
+
assert_equal('intro', spine.at('./itemref[position()=1]')['idref'])
|
58
|
+
assert_equal('glossary', spine.at('./itemref[position()=3]')['idref'])
|
55
59
|
end
|
56
60
|
end
|
data/test/epub/test_toc.rb
CHANGED
@@ -7,12 +7,16 @@ class TestToc < Test::Unit::TestCase
|
|
7
7
|
def test_toc_create
|
8
8
|
x = Repub::Epub::Toc.new('some-name')
|
9
9
|
s = x.to_xml
|
10
|
-
|
11
|
-
doc
|
12
|
-
|
10
|
+
doc = Nokogiri::XML.parse(s)
|
11
|
+
assert_equal('some-name', doc.at("//xmlns:meta[@name='dtb:uid']")['content'])
|
12
|
+
assert_equal('1', doc.at("//xmlns:meta[@name='dtb:depth']")['content'])
|
13
|
+
assert_equal('0', doc.at("//xmlns:meta[@name='dtb:totalPageCount']")['content'])
|
14
|
+
assert_equal('0', doc.at("//xmlns:meta[@name='dtb:maxPageNumber']")['content'])
|
15
|
+
assert_equal('Untitled', doc.at("//xmlns:docTitle/xmlns:text").inner_text)
|
16
|
+
assert_not_nil(doc.at('//xmlns:navMap'))
|
13
17
|
end
|
14
18
|
|
15
|
-
def
|
19
|
+
def test_nav_map
|
16
20
|
x = Repub::Epub::Toc.new('some-name')
|
17
21
|
p0 = x.nav_map.add_nav_point('Intro', 'intro.html')
|
18
22
|
p1 = x.nav_map.add_nav_point('Chapter 1', 'chapter-1.html')
|
@@ -22,8 +26,16 @@ class TestToc < Test::Unit::TestCase
|
|
22
26
|
p11 = p1.add_nav_point('Chapter 1-1', 'chapter-1-1.html')
|
23
27
|
p12 = p1.add_nav_point('Chapter 1-2', 'chapter-1-2.html')
|
24
28
|
s = x.to_xml
|
25
|
-
|
26
|
-
doc
|
27
|
-
|
29
|
+
doc = Nokogiri::XML.parse(s)
|
30
|
+
assert_equal(4, doc.xpath('//xmlns:navMap/xmlns:navPoint').size)
|
31
|
+
assert_equal('2', doc.at("//xmlns:meta[@name='dtb:depth']")['content'])
|
32
|
+
assert_equal('1', doc.at('//xmlns:navMap/xmlns:navPoint[position()=1]')['playOrder'])
|
33
|
+
assert_equal('2', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]')['playOrder'])
|
34
|
+
assert_equal('3', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]/xmlns:navPoint[position()=1]')['playOrder'])
|
35
|
+
assert_equal('5', doc.at('//xmlns:navMap/xmlns:navPoint[position()=3]')['playOrder'])
|
36
|
+
assert_equal('navPoint-2', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]')['id'])
|
37
|
+
assert_equal('Chapter 1', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]/xmlns:navLabel/xmlns:text').inner_text)
|
38
|
+
assert_equal('chapter-1.html', doc.at('//xmlns:navMap/xmlns:navPoint[position()=2]/xmlns:content')['src'])
|
39
|
+
assert_equal(7, doc.xpath('//xmlns:navMap//xmlns:navPoint').size)
|
28
40
|
end
|
29
41
|
end
|
data/test/test_builder.rb
CHANGED
@@ -1,8 +1,152 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'repub'
|
3
|
+
require 'repub/app'
|
3
4
|
|
4
5
|
class TestBuilder < Test::Unit::TestCase
|
6
|
+
include Repub::App::Fetcher
|
7
|
+
include Repub::App::Parser
|
8
|
+
include Repub::App::Builder
|
9
|
+
attr_reader :options
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@url = 'file://' + File.expand_path(File.join(File.dirname(__FILE__), 'data/test.html'))
|
13
|
+
@options = {
|
14
|
+
:url => @url,
|
15
|
+
# NOTE: cannot test with wget because it doesn't support file:// schema
|
16
|
+
:helper => 'httrack',
|
17
|
+
:selectors => {
|
18
|
+
:title => '//h1',
|
19
|
+
:toc => '//ul',
|
20
|
+
:toc_item => './li',
|
21
|
+
:toc_section => './ul'
|
22
|
+
},
|
23
|
+
# do not delete temp folder
|
24
|
+
:browser => true
|
25
|
+
}
|
26
|
+
Cache.cleanup
|
27
|
+
end
|
28
|
+
|
29
|
+
def teardown
|
30
|
+
Cache.cleanup
|
31
|
+
end
|
32
|
+
|
5
33
|
def test_builder
|
6
|
-
|
34
|
+
builder = build(parse(fetch))
|
35
|
+
doc_path = builder.document_path
|
36
|
+
assert(doc_path.include?('test.html'))
|
37
|
+
doc_text = IO.read(doc_path)
|
38
|
+
# doctype was added
|
39
|
+
assert(doc_text =~ /^<!DOCTYPE/)
|
40
|
+
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
|
41
|
+
# encoding was set to utf-8
|
42
|
+
doc.xpath('//head/meta[@http-equiv="Content-Type"]').each do |el|
|
43
|
+
assert_equal('text/html; charset=utf-8', el['content'].downcase)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_rx
|
48
|
+
@options[:rx] = ['/Chapter/Retpahc/', '/<h1>/<h2>/', '/<\/h1>/<\/h2>/', '/\s?[Ll]orem\s+//']
|
49
|
+
builder = build(parse(fetch))
|
50
|
+
doc_path = builder.document_path
|
51
|
+
doc_text = IO.read(doc_path)
|
52
|
+
assert(doc_text =~ /Retpahc/ && doc_text !~ /Chapter/)
|
53
|
+
assert(doc_text =~ /<h2>/ && doc_text !~ /<h1>/)
|
54
|
+
assert(doc_text =~ /<\/h2>/ && doc_text !~ /<\/h1>/)
|
55
|
+
assert(doc_text !~ /[Ll]orem/)
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_custom_css
|
59
|
+
@options[:css] = File.expand_path(File.join(File.dirname(__FILE__), 'data/custom.css'))
|
60
|
+
builder = build(parse(fetch))
|
61
|
+
doc_path = builder.document_path
|
62
|
+
doc_text = IO.read(doc_path)
|
63
|
+
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
|
64
|
+
links = doc.xpath('//head/link[@rel="stylesheet"]')
|
65
|
+
# we have single link
|
66
|
+
assert_equal(1, links.size)
|
67
|
+
# referencing custom.css
|
68
|
+
assert_equal('custom.css', links[0]['href'])
|
69
|
+
head_last_child = doc.at('//head/*[last()]')
|
70
|
+
# and it is head's last child
|
71
|
+
assert_equal(links[0], head_last_child)
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_removing_styles
|
75
|
+
@options[:css] = '-'
|
76
|
+
builder = build(parse(fetch))
|
77
|
+
doc_path = builder.document_path
|
78
|
+
doc_text = IO.read(doc_path)
|
79
|
+
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
|
80
|
+
links = doc.xpath('//head/link[@rel="stylesheet"]')
|
81
|
+
# no stylesheet links
|
82
|
+
assert_equal(0, links.size)
|
83
|
+
styles = doc.xpath('//head/style')
|
84
|
+
# no <style> elements
|
85
|
+
assert_equal(0, styles.size)
|
86
|
+
end
|
87
|
+
|
88
|
+
def next_nontext_sibling(el)
|
89
|
+
begin
|
90
|
+
el = el.next_sibling
|
91
|
+
end while el.text?
|
92
|
+
el
|
93
|
+
end
|
94
|
+
|
95
|
+
def previous_nontext_sibling(el)
|
96
|
+
begin
|
97
|
+
el = el.previous_sibling
|
98
|
+
end while el.text?
|
99
|
+
el
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_inserting_elements_after
|
103
|
+
selector1 = '//ul'
|
104
|
+
fragment1 = Nokogiri::HTML.fragment('<p>blah</p>')
|
105
|
+
selector2 = '//p[last()]'
|
106
|
+
fragment2 = Nokogiri::HTML.fragment('<span>bleh</span><div>boo</div>')
|
107
|
+
@options[:after] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
|
108
|
+
builder = build(parse(fetch))
|
109
|
+
doc_path = builder.document_path
|
110
|
+
doc_text = IO.read(doc_path)
|
111
|
+
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
|
112
|
+
el = next_nontext_sibling(doc.at(selector1))
|
113
|
+
assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
|
114
|
+
# first fragment node
|
115
|
+
el = next_nontext_sibling(doc.at(selector2))
|
116
|
+
assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
|
117
|
+
# second fragment node
|
118
|
+
el = next_nontext_sibling(el)
|
119
|
+
assert_equal(fragment2.children[1].to_s.strip, el.to_s.strip)
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_inserting_elements_before
|
123
|
+
selector1 = '//a[@id="c11"]'
|
124
|
+
fragment1 = Nokogiri::HTML.fragment('<h4>blah</h4><div>boo</div>')
|
125
|
+
selector2 = '//p[position()=5]'
|
126
|
+
fragment2 = Nokogiri::HTML.fragment('<div>test</div>')
|
127
|
+
@options[:before] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
|
128
|
+
builder = build(parse(fetch))
|
129
|
+
doc_path = builder.document_path
|
130
|
+
doc_text = IO.read(doc_path)
|
131
|
+
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
|
132
|
+
# first fragment node
|
133
|
+
el = previous_nontext_sibling(doc.at(selector1))
|
134
|
+
assert_equal(fragment1.children[1].to_s.strip, el.to_s.strip)
|
135
|
+
# second fragment node
|
136
|
+
el = previous_nontext_sibling(el)
|
137
|
+
assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
|
138
|
+
el = previous_nontext_sibling(doc.at(selector2))
|
139
|
+
assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_remove_elements
|
143
|
+
@options[:remove] = ['ul', '//a[@id="c2"]', 'div[@class="img"]']
|
144
|
+
builder = build(parse(fetch))
|
145
|
+
doc_path = builder.document_path
|
146
|
+
doc_text = IO.read(doc_path)
|
147
|
+
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
|
148
|
+
@options[:remove].each do |selector|
|
149
|
+
assert_equal(0, doc.xpath(selector).size)
|
150
|
+
end
|
7
151
|
end
|
8
152
|
end
|
data/test/test_fetcher.rb
CHANGED
@@ -7,30 +7,89 @@ class TestFetcher < Test::Unit::TestCase
|
|
7
7
|
include Repub::App::Fetcher
|
8
8
|
attr_reader :options
|
9
9
|
|
10
|
-
def
|
10
|
+
def setup
|
11
|
+
@url = 'file://' + File.expand_path(File.join(File.dirname(__FILE__), 'data/test.html'))
|
11
12
|
@options = {
|
12
|
-
:url =>
|
13
|
-
:
|
13
|
+
:url => @url,
|
14
|
+
# NOTE: cannot test with wget because it doesn't support file:// schema
|
15
|
+
:helper => 'httrack'
|
14
16
|
}
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
Cache.cleanup
|
18
|
+
end
|
19
|
+
|
20
|
+
def teardown
|
21
|
+
Cache.cleanup
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_cache_cleanup
|
25
|
+
Cache.cleanup
|
26
|
+
assert_equal(0, Dir.glob(Cache.root + '/**').size)
|
27
|
+
cache = fetch
|
28
|
+
assert_equal(1, Dir.glob(Cache.root + '/**').size)
|
29
|
+
assert_equal(3, Dir.glob(cache.path + '/*').size)
|
30
|
+
Cache.cleanup
|
31
|
+
assert_equal(0, Dir.glob(Cache.root + '/**').size)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_fetcher
|
35
|
+
cache = fetch
|
36
|
+
assert_equal(@url, cache.url)
|
37
|
+
assert_equal('8b8d358cf1ada41d4fee885a47530296528dc235', cache.name)
|
38
|
+
assert(cache.path.include?('.repub/cache/8b8d358cf1ada41d4fee885a47530296528dc235'))
|
39
|
+
assert(File.exist?(File.join(cache.path, cache.assets[:documents][0])))
|
40
|
+
assert_equal(1, cache.assets[:documents].size)
|
41
|
+
assert_equal('test.html', cache.assets[:documents][0])
|
42
|
+
assert(File.exist?(File.join(cache.path, cache.assets[:stylesheets][0])))
|
43
|
+
assert_equal(1, cache.assets[:stylesheets].size)
|
44
|
+
assert_equal('test.css', cache.assets[:stylesheets][0])
|
45
|
+
assert(File.exist?(File.join(cache.path, cache.assets[:images][0])))
|
46
|
+
assert_equal(1, cache.assets[:images].size)
|
47
|
+
assert_equal('invisiblellama.png', cache.assets[:images][0])
|
23
48
|
end
|
24
49
|
|
25
50
|
def test_fetcher_fail
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
51
|
+
# empty url
|
52
|
+
@options[:url] = nil
|
53
|
+
assert_raise(Repub::App::FetcherException) do
|
54
|
+
cache = fetch
|
55
|
+
end
|
56
|
+
@options[:url] = ''
|
57
|
+
assert_raise(Repub::App::FetcherException) do
|
58
|
+
cache = fetch
|
59
|
+
end
|
60
|
+
# empty download helper
|
61
|
+
@options[:url] = 'bleh'
|
62
|
+
@options[:helper] = nil
|
63
|
+
assert_raise(Repub::App::FetcherException) do
|
64
|
+
cache = fetch
|
65
|
+
end
|
66
|
+
@options[:helper] = ''
|
67
|
+
assert_raise(Repub::App::FetcherException) do
|
68
|
+
cache = fetch
|
69
|
+
end
|
70
|
+
# unknown download helper
|
71
|
+
@options[:helper] = 'blah'
|
72
|
+
assert_raise(Repub::App::FetcherException) do
|
73
|
+
cache = fetch
|
74
|
+
end
|
75
|
+
# unresolvable url
|
76
|
+
@options[:helper] = 'wget'
|
77
|
+
assert_raise(Repub::App::FetcherException) do
|
78
|
+
cache = fetch
|
79
|
+
end
|
80
|
+
@options[:helper] = 'httrack'
|
81
|
+
assert_raise(Repub::App::FetcherException) do
|
82
|
+
cache = fetch
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_file_encoding_conversion
|
87
|
+
cache = fetch
|
88
|
+
assert_equal('test.html', cache.assets[:documents][0])
|
89
|
+
doc = cache.assets[:documents][0]
|
90
|
+
s_orig = IO.read(File.join(cache.path, doc))
|
91
|
+
encoding = UniversalDetector.chardet(s_orig)['encoding']
|
92
|
+
s_converted = Iconv.conv('utf-8', encoding, s_orig)
|
93
|
+
assert_equal(s_orig, s_converted)
|
34
94
|
end
|
35
|
-
|
36
95
|
end
|