ruby-crawler 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ module Crawler
2
+ module Validations
3
+
4
+ VALID_PROTOCOLS = ['http', 'https'].freeze
5
+
6
+ # Validates protocol is http or https
7
+ #
8
+ def validate_protocol
9
+ raise InvalidProtocolError.new('Please specify either http or https') unless VALID_PROTOCOLS.include? base_uri.scheme
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,3 @@
1
+ module Crawler
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,46 @@
1
+ require 'test_helper'
2
+
3
+ class CrawlerTest < MiniTest::Test
4
+
5
+ def test_initialization
6
+ crawler = Crawler.new('https://www.digitalocean.com')
7
+ assert_equal 'www.digitalocean.com', crawler.base_uri.hostname
8
+ assert_equal 'https://www.digitalocean.com', crawler.base_url
9
+ assert_equal 'https', crawler.base_uri.scheme
10
+ end
11
+
12
+ def test_crawl
13
+ crawler = Crawler.new('https://www.digitalocean.com')
14
+ crawler.crawl
15
+ results = crawler.results
16
+ assert_equal expected_crawl_results, results
17
+ end
18
+
19
+ def expected_crawl_results
20
+ {
21
+ "domain" => "www.digitalocean.com",
22
+ "paths" => {
23
+ "/company/careers/" => {
24
+ "asset_dependencies" => [],
25
+ "links_to" => [],
26
+ "linked_to_from" => ["/features/", "/pricing/", "/"]
27
+ },
28
+ "/" => {
29
+ "asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
30
+ "links_to" => ["/company/careers/", "/pricing/", "/"],
31
+ "linked_to_from" => ["/features/", "/pricing/"]
32
+ },
33
+ "/pricing/" => {
34
+ "asset_dependencies" => ["/assets/css/style.css", "/assets/images/mashable.png"],
35
+ "links_to" => ["/company/careers/", "/features/", "/"],
36
+ "linked_to_from" => ["/features/", "/"]
37
+ },
38
+ "/features/" => {
39
+ "asset_dependencies" => ["/assets/images/venturebeat.png"],
40
+ "links_to" => ["/company/careers/", "/pricing/", "/"],
41
+ "linked_to_from" => ["/pricing/"]
42
+ }
43
+ }
44
+ }
45
+ end
46
+ end
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+
3
+ class DocumentTest < MiniTest::Test
4
+
5
+ def test_document_parsing
6
+ document = Crawler::Document.new('https://www.digitalocean.com/')
7
+
8
+ # Test link discovery
9
+ ['http://techcrunch.com/article/', 'https://cloud.digitalocean.com/login', '/company/careers/'].each do |link|
10
+ assert_includes document.links, link
11
+ end
12
+
13
+ # Test domain specific link discovery
14
+ assert_includes document.domain_specific_paths, '/company/careers/'
15
+
16
+ ['https://twitter.com/digitalocean', 'https://status.digitalocean.com/', '#'].each do |link|
17
+ refute_includes document.domain_specific_paths, link
18
+ end
19
+
20
+ # Test assest discovery
21
+ assert_includes document.static_assets, '/assets/images/techcrunch.png'
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+ require 'crawler/formatting'
3
+
4
+ class FormattingTest < MiniTest::Test
5
+ include Crawler::Formatting
6
+
7
+ def test_normalize_path
8
+ path = '/'
9
+ assert_equal '/', normalize_path(path)
10
+
11
+ path = '/pricing'
12
+ assert_equal '/pricing/', normalize_path(path)
13
+
14
+ path = '/pricing/'
15
+ assert_equal '/pricing/', normalize_path(path)
16
+ end
17
+
18
+ def test_construct_url
19
+ url = 'https://www.digitalocean.com'
20
+ uri = URI.parse(url)
21
+ assert_equal url, construct_url(uri)
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+
3
+ class HttpTest < MiniTest::Test
4
+ include Crawler::Http
5
+
6
+ def test_request
7
+ domain = 'https://www.digitalocean.com'
8
+ response = request domain
9
+ assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
10
+ end
11
+
12
+ def test_request_follows_redirect
13
+ domain = 'https://www.digitalocean.com/redirect'
14
+ response = request domain
15
+ assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
16
+ end
17
+
18
+ def test_bad_request_is_rescued
19
+ domain = 'https://www.digitalocean.com/bad_request'
20
+ response = request domain
21
+ assert_equal '', response
22
+ end
23
+ end
@@ -0,0 +1,78 @@
1
+ require 'test_helper'
2
+
3
+ class IndexTest < MiniTest::Test
4
+ include Crawler::Formatting
5
+
6
+ def test_initialization
7
+ domain = 'https://www.digitalocean.com'
8
+ index = Crawler::Index.new(Addressable::URI.parse(domain))
9
+
10
+ expected_results = {
11
+ 'domain' => 'www.digitalocean.com',
12
+ 'paths' => {}
13
+ }
14
+
15
+ assert_equal expected_results, index.results
16
+ end
17
+
18
+ def test_document_consumption
19
+ domain = "https://www.digitalocean.com"
20
+ document = Crawler::Document.new(domain)
21
+ uri = Addressable::URI.parse(domain)
22
+ index = Crawler::Index.new(uri)
23
+ index.consume_document uri.path, document
24
+
25
+ # Test domain is correct
26
+ assert_equal 'www.digitalocean.com', index.base_uri.hostname
27
+
28
+ # Test that urls / links are properly recorded
29
+ results = index.results
30
+
31
+ ["/company/careers/", "/", "/pricing/"].each do |key|
32
+ assert_includes results['paths'].keys, key
33
+ end
34
+
35
+ paths_to_visit = index.get_paths_to_visit
36
+ ["/company/careers/", "/pricing/"].each do |path|
37
+ assert_includes paths_to_visit, path
38
+ end
39
+
40
+ paths = index.get_stored_paths
41
+ ["/"].each do |path|
42
+ assert_includes paths, path
43
+ end
44
+
45
+ # Test that the assets are recorded
46
+ assets = index.get_path_assets('/')
47
+ ["/assets/images/techcrunch.png", "/assets/images/cover_create.jpg", "/assets/css/style.css"].each do |asset|
48
+ assert_includes assets, asset
49
+ end
50
+
51
+ # Test the indexed results
52
+ assert_equal expected_index_results, index.results
53
+ end
54
+
55
+ def expected_index_results
56
+ {
57
+ "domain" => "www.digitalocean.com",
58
+ "paths" => {
59
+ "/company/careers/" => {
60
+ "asset_dependencies" => [],
61
+ "links_to" => [],
62
+ "linked_to_from" => ["/"]
63
+ },
64
+ "/pricing/" => {
65
+ "asset_dependencies" => [],
66
+ "links_to" => [],
67
+ "linked_to_from"=>["/"]
68
+ },
69
+ "/" => {
70
+ "asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
71
+ "links_to" => ["/company/careers/", "/pricing/", "/"],
72
+ "linked_to_from" => []
73
+ }
74
+ }
75
+ }
76
+ end
77
+
78
+ end
@@ -0,0 +1,147 @@
1
+ require 'test_helper'
2
+
3
+ class StorageTest < MiniTest::Test
4
+
5
+ def test_store_path_and_stored_paths
6
+ path = '/pricing/'
7
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
8
+ index.store_path path
9
+ assert_equal [path], index.get_stored_paths
10
+ end
11
+
12
+ def test_storing_path_assets
13
+ path = '/pricing/'
14
+ assets = ['asset1', 'asset2']
15
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
16
+ index.store_path_assets(path, assets)
17
+
18
+ stored_assets = index.get_path_assets(path)
19
+ assets.each do |asset|
20
+ assert_includes stored_assets, asset
21
+ end
22
+ end
23
+
24
+ def test_storing_path_links_to
25
+ path = '/pricing/'
26
+ links = ['link1', 'link2']
27
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
28
+ index.store_path_links_to(path, links)
29
+
30
+ stored_links = index.get_path_links_to(path)
31
+
32
+ links.each do |link|
33
+ assert_includes stored_links, link
34
+ end
35
+ end
36
+
37
+ def test_storing_path_linked_to_from
38
+ path = '/pricing/'
39
+ links = ['link1', 'link2']
40
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
41
+ index.store_path_linked_to_from(path, links)
42
+
43
+ stored_links = index.get_path_linked_to_from(path)
44
+ links.each do |link|
45
+ assert_includes stored_links, link
46
+ end
47
+ end
48
+
49
+ def test_storing_paths_visited
50
+ path = '/pricing/'
51
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
52
+ index.store_path_visited(path)
53
+ assert_equal [path], index.get_paths_visited
54
+ end
55
+
56
+ def test_queuing_paths
57
+ paths = ['/pricing/', '/features/']
58
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
59
+ index.store_paths_to_visit(paths)
60
+ assert_equal paths.reverse, index.get_paths_to_visit
61
+ end
62
+
63
+ def test_get_domain_data
64
+ path = '/pricing/'
65
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
66
+ index.store_path path
67
+
68
+ assets = ['asset1', 'asset2']
69
+ index.store_path_assets(path, assets)
70
+
71
+ links_to = ['link1', 'link2']
72
+ index.store_path_links_to(path, links_to)
73
+
74
+ linked_to_from = ['link3', 'link4']
75
+ index.store_path_linked_to_from(path, linked_to_from)
76
+
77
+ data = index.get_domain_data
78
+ assert_equal 'www.digitalocean.com', data['domain']
79
+
80
+ assets.each do |value|
81
+ assert_includes data['paths']['/pricing/']['asset_dependencies'], value
82
+ end
83
+
84
+ links_to.each do |value|
85
+ assert_includes data['paths']['/pricing/']['links_to'], value
86
+ end
87
+
88
+ linked_to_from.each do |value|
89
+ assert_includes data['paths']['/pricing/']['linked_to_from'], value
90
+ end
91
+ end
92
+
93
+ def test_get_path_data
94
+ path = '/pricing/'
95
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
96
+ index.store_path path
97
+
98
+ assets = ['asset1', 'asset2']
99
+ index.store_path_assets(path, assets)
100
+
101
+ links_to = ['link1', 'link2']
102
+ index.store_path_links_to(path, links_to)
103
+
104
+ linked_to_from = ['link3', 'link4']
105
+ index.store_path_linked_to_from(path, linked_to_from)
106
+
107
+ data = index.get_path_data(path)
108
+
109
+ assets.each do |value|
110
+ assert_includes data['asset_dependencies'], value
111
+ end
112
+
113
+ links_to.each do |value|
114
+ assert_includes data['links_to'], value
115
+ end
116
+
117
+ linked_to_from.each do |value|
118
+ assert_includes data['linked_to_from'], value
119
+ end
120
+ end
121
+
122
+ def test_clear_stored_data
123
+ path = '/pricing/'
124
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
125
+ index.store_path path
126
+
127
+ assets = ['asset1', 'asset2']
128
+ index.store_path_assets(path, assets)
129
+
130
+ links_to = ['link1', 'link2']
131
+ index.store_path_links_to(path, links_to)
132
+
133
+ linked_to_from = ['link3', 'link4']
134
+ index.store_path_linked_to_from(path, linked_to_from)
135
+
136
+ index.clear_stored_results
137
+
138
+ empty_results = {
139
+ 'asset_dependencies' => [],
140
+ 'links_to' => [],
141
+ 'linked_to_from' => []
142
+ }
143
+
144
+ assert_equal empty_results, index.get_path_data(path)
145
+ end
146
+
147
+ end
@@ -0,0 +1,10 @@
1
+ require 'test_helper'
2
+
3
+ class ValidationsTest < MiniTest::Test
4
+
5
+ def test_protocol_validation
6
+ error = assert_raises(Crawler::InvalidProtocolError) { Crawler.new('digitalocean.com') }
7
+ assert_equal 'Please specify either http or https', error.message
8
+ end
9
+
10
+ end
@@ -0,0 +1,256 @@
1
+ module DomainHtml
2
+
3
+ # Domain links: '/', 'company/careers', '/pricing'
4
+ # Subdomains: 'cloud.digitalocean.com/...'
5
+ # External links: 'techcrunch/...'
6
+ # Assets: /assets/images/techcrunch.png, /assets/images/cover_create.jpg, //use.typekit.net/wix0mlm.js, /assets/css/style.css
7
+ def root_html
8
+ '<!doctype html>
9
+ <html data-placeholder-focus="false">
10
+ <head>
11
+ <meta charset="utf-8">
12
+ <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
13
+ <title>SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean</title>
14
+ <meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
15
+ <meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
16
+ <meta name="viewport" content="width=1200, maximum-scale=1" />
17
+ <link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
18
+ <script type="text/javascript" src="//use.typekit.net/wix0mlm.js"></script>
19
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
20
+ </head>
21
+ <body class="index">
22
+ <header>
23
+ <div class="wrapper-full">
24
+ <a class="logo" href="/">DigitalOcean</a>
25
+ <a id="hiring" href="/company/careers/">We\'re Hiring!</a>
26
+ <nav id="button-nav">
27
+ <ul>
28
+ <li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
29
+ <li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
30
+ </ul>
31
+ </nav>
32
+ <nav>
33
+ <ul>
34
+ <li><a class="" href="/pricing/">Pricing</a></li>
35
+ </ul>
36
+ </nav>
37
+ </div>
38
+ </header>
39
+ <section class="main">
40
+ <section id="homepage-features" class="homepage-section">
41
+ <div class="wrapper-full">
42
+ <h2>A control panel experience made simple.</h2>
43
+ <div id="browser">
44
+ <div id="last-vid">
45
+ <a href="#" class="restart standard-button">Replay</a>
46
+ </div>
47
+ <p class="url">https://cloud.digitalocean.com/droplets</p>
48
+ <video height="100%" class="feature-video" preload="none" poster="/assets/images/cover_create.jpg">
49
+ <source src="/assets/video/create.mp4" type="video/mp4">
50
+ <source src="/assets/video/create.webm" type="video/webm">
51
+ Your browser does not support the video tag.
52
+ </video>
53
+ </div>
54
+ <ul id="feature-controls">
55
+ <li class="create active" data-video-url="/assets/video/create.webm" data-url-bar="https://cloud.digitalocean.com/droplets">
56
+ <a href="#">
57
+ <div class="icon">
58
+ <span></span>
59
+ </div>
60
+ <h3>Create Droplets</h3>
61
+ <p>Setup and deploy your Droplet\'s configuration in a matter of seconds. </p>
62
+ </a>
63
+ </li>
64
+ </ul>
65
+ </div>
66
+ </section>
67
+ <section id="companies-using">
68
+ <div class="wrapper-full">
69
+ <h3>As Seen On</h3>
70
+ <ul>
71
+ <li><a href="http://techcrunch.com/article/"><img src="/assets/images/techcrunch.png"></a></li>
72
+ </ul>
73
+ </div>
74
+ </section>
75
+ <section class="call-to-action">
76
+ <div class="wrapper-full">
77
+ <h2>Deploy an SSD cloud server in 55 seconds.</h2>
78
+ <p>Sign up for DigitalOcean today.</p>
79
+ <a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
80
+ GET STARTED NOW <span></span>
81
+ </a>
82
+ </div>
83
+ </section>
84
+ </section>
85
+ </body>
86
+ </html>'
87
+ end
88
+
89
+ # Domain links: '/', 'company/careers', '/features'
90
+ # Subdomains: 'cloud.digitalocean.com/...'
91
+ # External links: 'mashable/...'
92
+ # Assets: /assets/css/style.css, /assets/images/mashable.png
93
+ def pricing_html
94
+ '<!doctype html>
95
+ <html data-placeholder-focus="false">
96
+ <head>
97
+ <meta charset="utf-8">
98
+ <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
99
+ <title>Pricing</title>
100
+ <meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
101
+ <meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
102
+ <meta name="viewport" content="width=1200, maximum-scale=1" />
103
+ <link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
104
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
105
+ </head>
106
+ <body class="index">
107
+ <header>
108
+ <div class="wrapper-full">
109
+ <a class="logo" href="/">DigitalOcean</a>
110
+ <a id="hiring" href="/company/careers/">We\'re Hiring!</a>
111
+ <nav id="button-nav">
112
+ <ul>
113
+ <li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
114
+ <li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
115
+ </ul>
116
+ </nav>
117
+ <nav>
118
+ <ul>
119
+ <li><a class="" href="/features/">Features</a></li>
120
+ <li><a id="hiring" href="/company/careers/">We\'re Hiring!</a></li>
121
+ </ul>
122
+ </nav>
123
+ </div>
124
+ </header>
125
+ <section class="main">
126
+ <section id="homepage-features" class="homepage-section">
127
+ <div class="wrapper-full">
128
+ <h2>A control panel experience made simple.</h2>
129
+ <div id="browser">
130
+ <div id="last-vid">
131
+ <a href="#" class="restart standard-button">Replay</a>
132
+ </div>
133
+ <p class="url">https://cloud.digitalocean.com/droplets</p>
134
+ </div>
135
+ <ul id="feature-controls">
136
+ <li class="dns" data-video-url="/assets/video/dns.webm" data-url-bar="https://cloud.digitalocean.com/domains">
137
+ <a href="#">
138
+ <div class="icon">
139
+ <span></span>
140
+ </div>
141
+ <h3>DNS Management</h3>
142
+ <p>Full feature DNS management allows<br>you to easily manage your domains.</p>
143
+ </a>
144
+ </li>
145
+ </ul>
146
+ </div>
147
+ </section>
148
+ <section id="companies-using">
149
+ <div class="wrapper-full">
150
+ <h3>As Seen On</h3>
151
+ <ul>
152
+ <li><a href="http://mashable.com/article/"><img src="/assets/images/mashable.png"></a></li>
153
+ </ul>
154
+ </div>
155
+ </section>
156
+ <section class="call-to-action">
157
+ <div class="wrapper-full">
158
+ <h2>Deploy an SSD cloud server in 55 seconds.</h2>
159
+ <p>Sign up for DigitalOcean today.</p>
160
+ <a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
161
+ GET STARTED NOW <span></span>
162
+ </a>
163
+ </div>
164
+ </section>
165
+ </section>
166
+ </body>
167
+ </html>'
168
+ end
169
+
170
+ # Domain links: '/', 'company/careers', '/pricing'
171
+ # Subdomains: 'cloud.digitalocean.com/...'
172
+ # External links: 'venturebeat/...'
173
+ # Assets: /assets/images/venturebeat.png
174
+ def features_html
175
+ '<!doctype html>
176
+ <html data-placeholder-focus="false">
177
+ <head>
178
+ <meta charset="utf-8">
179
+ <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
180
+ <title>Features</title>
181
+ <meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
182
+ <meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
183
+ <meta name="viewport" content="width=1200, maximum-scale=1" />
184
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
185
+ </head>
186
+ <body class="index">
187
+ <header>
188
+ <div class="wrapper-full">
189
+ <a class="logo" href="/">DigitalOcean</a>
190
+ <a id="hiring" href="/company/careers/">We\'re Hiring!</a>
191
+ <nav id="button-nav">
192
+ <ul>
193
+ <li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
194
+ <li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
195
+ </ul>
196
+ </nav>
197
+ <nav>
198
+ <ul>
199
+ <li><a class="" href="/pricing/">Pricing</a></li>
200
+ </ul>
201
+ </nav>
202
+ </div>
203
+ </header>
204
+ <section class="main">
205
+ <section id="homepage-features" class="homepage-section">
206
+ <div class="wrapper-full">
207
+ <h2>A control panel experience made simple.</h2>
208
+ <div id="browser">
209
+ <div id="last-vid">
210
+ <a href="#" class="restart standard-button">Replay</a>
211
+ </div>
212
+ </div>
213
+ <ul id="feature-controls">
214
+ <li class="backups" data-video-url="/assets/video/snapshot.webm" data-url-bar="https://cloud.digitalocean.com/images">
215
+ <a href="#">
216
+ <div class="icon">
217
+ <span></span>
218
+ </div>
219
+ <h3>Snapshots</h3>
220
+ <p>Build a new server from a snapshot<br>you took, saving you configuration time.</p>
221
+ </a>
222
+ </li>
223
+ </ul>
224
+ </div>
225
+ </section>
226
+ <section id="companies-using">
227
+ <div class="wrapper-full">
228
+ <h3>As Seen On</h3>
229
+ <ul>
230
+ <li><a href="http://venturebeat.com/article/"><img src="/assets/images/venturebeat.png"></a></li>
231
+ </ul>
232
+ </div>
233
+ </section>
234
+ <section class="call-to-action">
235
+ <div class="wrapper-full">
236
+ <h2>Deploy an SSD cloud server in 55 seconds.</h2>
237
+ <p>Sign up for DigitalOcean today.</p>
238
+ <a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
239
+ GET STARTED NOW <span></span>
240
+ </a>
241
+ </div>
242
+ </section>
243
+ </section>
244
+ </body>
245
+ </html>'
246
+ end
247
+
248
+ def careers_html
249
+ '<!doctype html>
250
+ <html data-placeholder-focus="false">
251
+ <body>
252
+ No substantial content
253
+ </body>
254
+ </html>'
255
+ end
256
+ end