ruby-crawler 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+ module Crawler
2
+ module Validations
3
+
4
+ VALID_PROTOCOLS = ['http', 'https'].freeze
5
+
6
+ # Validates protocol is http or https
7
+ #
8
+ def validate_protocol
9
+ raise InvalidProtocolError.new('Please specify either http or https') unless VALID_PROTOCOLS.include? base_uri.scheme
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,3 @@
1
+ module Crawler
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,46 @@
1
+ require 'test_helper'
2
+
3
+ class CrawlerTest < MiniTest::Test
4
+
5
+ def test_initialization
6
+ crawler = Crawler.new('https://www.digitalocean.com')
7
+ assert_equal 'www.digitalocean.com', crawler.base_uri.hostname
8
+ assert_equal 'https://www.digitalocean.com', crawler.base_url
9
+ assert_equal 'https', crawler.base_uri.scheme
10
+ end
11
+
12
+ def test_crawl
13
+ crawler = Crawler.new('https://www.digitalocean.com')
14
+ crawler.crawl
15
+ results = crawler.results
16
+ assert_equal expected_crawl_results, results
17
+ end
18
+
19
+ def expected_crawl_results
20
+ {
21
+ "domain" => "www.digitalocean.com",
22
+ "paths" => {
23
+ "/company/careers/" => {
24
+ "asset_dependencies" => [],
25
+ "links_to" => [],
26
+ "linked_to_from" => ["/features/", "/pricing/", "/"]
27
+ },
28
+ "/" => {
29
+ "asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
30
+ "links_to" => ["/company/careers/", "/pricing/", "/"],
31
+ "linked_to_from" => ["/features/", "/pricing/"]
32
+ },
33
+ "/pricing/" => {
34
+ "asset_dependencies" => ["/assets/css/style.css", "/assets/images/mashable.png"],
35
+ "links_to" => ["/company/careers/", "/features/", "/"],
36
+ "linked_to_from" => ["/features/", "/"]
37
+ },
38
+ "/features/" => {
39
+ "asset_dependencies" => ["/assets/images/venturebeat.png"],
40
+ "links_to" => ["/company/careers/", "/pricing/", "/"],
41
+ "linked_to_from" => ["/pricing/"]
42
+ }
43
+ }
44
+ }
45
+ end
46
+ end
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+
3
+ class DocumentTest < MiniTest::Test
4
+
5
+ def test_document_parsing
6
+ document = Crawler::Document.new('https://www.digitalocean.com/')
7
+
8
+ # Test link discovery
9
+ ['http://techcrunch.com/article/', 'https://cloud.digitalocean.com/login', '/company/careers/'].each do |link|
10
+ assert_includes document.links, link
11
+ end
12
+
13
+ # Test domain specific link discovery
14
+ assert_includes document.domain_specific_paths, '/company/careers/'
15
+
16
+ ['https://twitter.com/digitalocean', 'https://status.digitalocean.com/', '#'].each do |link|
17
+ refute_includes document.domain_specific_paths, link
18
+ end
19
+
20
+ # Test assest discovery
21
+ assert_includes document.static_assets, '/assets/images/techcrunch.png'
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+ require 'crawler/formatting'
3
+
4
+ class FormattingTest < MiniTest::Test
5
+ include Crawler::Formatting
6
+
7
+ def test_normalize_path
8
+ path = '/'
9
+ assert_equal '/', normalize_path(path)
10
+
11
+ path = '/pricing'
12
+ assert_equal '/pricing/', normalize_path(path)
13
+
14
+ path = '/pricing/'
15
+ assert_equal '/pricing/', normalize_path(path)
16
+ end
17
+
18
+ def test_construct_url
19
+ url = 'https://www.digitalocean.com'
20
+ uri = URI.parse(url)
21
+ assert_equal url, construct_url(uri)
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+
3
+ class HttpTest < MiniTest::Test
4
+ include Crawler::Http
5
+
6
+ def test_request
7
+ domain = 'https://www.digitalocean.com'
8
+ response = request domain
9
+ assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
10
+ end
11
+
12
+ def test_request_follows_redirect
13
+ domain = 'https://www.digitalocean.com/redirect'
14
+ response = request domain
15
+ assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
16
+ end
17
+
18
+ def test_bad_request_is_rescued
19
+ domain = 'https://www.digitalocean.com/bad_request'
20
+ response = request domain
21
+ assert_equal '', response
22
+ end
23
+ end
@@ -0,0 +1,78 @@
1
+ require 'test_helper'
2
+
3
+ class IndexTest < MiniTest::Test
4
+ include Crawler::Formatting
5
+
6
+ def test_initialization
7
+ domain = 'https://www.digitalocean.com'
8
+ index = Crawler::Index.new(Addressable::URI.parse(domain))
9
+
10
+ expected_results = {
11
+ 'domain' => 'www.digitalocean.com',
12
+ 'paths' => {}
13
+ }
14
+
15
+ assert_equal expected_results, index.results
16
+ end
17
+
18
+ def test_document_consumption
19
+ domain = "https://www.digitalocean.com"
20
+ document = Crawler::Document.new(domain)
21
+ uri = Addressable::URI.parse(domain)
22
+ index = Crawler::Index.new(uri)
23
+ index.consume_document uri.path, document
24
+
25
+ # Test domain is correct
26
+ assert_equal 'www.digitalocean.com', index.base_uri.hostname
27
+
28
+ # Test that urls / links are properly recorded
29
+ results = index.results
30
+
31
+ ["/company/careers/", "/", "/pricing/"].each do |key|
32
+ assert_includes results['paths'].keys, key
33
+ end
34
+
35
+ paths_to_visit = index.get_paths_to_visit
36
+ ["/company/careers/", "/pricing/"].each do |path|
37
+ assert_includes paths_to_visit, path
38
+ end
39
+
40
+ paths = index.get_stored_paths
41
+ ["/"].each do |path|
42
+ assert_includes paths, path
43
+ end
44
+
45
+ # Test that the assets are recorded
46
+ assets = index.get_path_assets('/')
47
+ ["/assets/images/techcrunch.png", "/assets/images/cover_create.jpg", "/assets/css/style.css"].each do |asset|
48
+ assert_includes assets, asset
49
+ end
50
+
51
+ # Test the indexed results
52
+ assert_equal expected_index_results, index.results
53
+ end
54
+
55
+ def expected_index_results
56
+ {
57
+ "domain" => "www.digitalocean.com",
58
+ "paths" => {
59
+ "/company/careers/" => {
60
+ "asset_dependencies" => [],
61
+ "links_to" => [],
62
+ "linked_to_from" => ["/"]
63
+ },
64
+ "/pricing/" => {
65
+ "asset_dependencies" => [],
66
+ "links_to" => [],
67
+ "linked_to_from"=>["/"]
68
+ },
69
+ "/" => {
70
+ "asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
71
+ "links_to" => ["/company/careers/", "/pricing/", "/"],
72
+ "linked_to_from" => []
73
+ }
74
+ }
75
+ }
76
+ end
77
+
78
+ end
@@ -0,0 +1,147 @@
1
+ require 'test_helper'
2
+
3
+ class StorageTest < MiniTest::Test
4
+
5
+ def test_store_path_and_stored_paths
6
+ path = '/pricing/'
7
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
8
+ index.store_path path
9
+ assert_equal [path], index.get_stored_paths
10
+ end
11
+
12
+ def test_storing_path_assets
13
+ path = '/pricing/'
14
+ assets = ['asset1', 'asset2']
15
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
16
+ index.store_path_assets(path, assets)
17
+
18
+ stored_assets = index.get_path_assets(path)
19
+ assets.each do |asset|
20
+ assert_includes stored_assets, asset
21
+ end
22
+ end
23
+
24
+ def test_storing_path_links_to
25
+ path = '/pricing/'
26
+ links = ['link1', 'link2']
27
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
28
+ index.store_path_links_to(path, links)
29
+
30
+ stored_links = index.get_path_links_to(path)
31
+
32
+ links.each do |link|
33
+ assert_includes stored_links, link
34
+ end
35
+ end
36
+
37
+ def test_storing_path_linked_to_from
38
+ path = '/pricing/'
39
+ links = ['link1', 'link2']
40
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
41
+ index.store_path_linked_to_from(path, links)
42
+
43
+ stored_links = index.get_path_linked_to_from(path)
44
+ links.each do |link|
45
+ assert_includes stored_links, link
46
+ end
47
+ end
48
+
49
+ def test_storing_paths_visited
50
+ path = '/pricing/'
51
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
52
+ index.store_path_visited(path)
53
+ assert_equal [path], index.get_paths_visited
54
+ end
55
+
56
+ def test_queuing_paths
57
+ paths = ['/pricing/', '/features/']
58
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
59
+ index.store_paths_to_visit(paths)
60
+ assert_equal paths.reverse, index.get_paths_to_visit
61
+ end
62
+
63
+ def test_get_domain_data
64
+ path = '/pricing/'
65
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
66
+ index.store_path path
67
+
68
+ assets = ['asset1', 'asset2']
69
+ index.store_path_assets(path, assets)
70
+
71
+ links_to = ['link1', 'link2']
72
+ index.store_path_links_to(path, links_to)
73
+
74
+ linked_to_from = ['link3', 'link4']
75
+ index.store_path_linked_to_from(path, linked_to_from)
76
+
77
+ data = index.get_domain_data
78
+ assert_equal 'www.digitalocean.com', data['domain']
79
+
80
+ assets.each do |value|
81
+ assert_includes data['paths']['/pricing/']['asset_dependencies'], value
82
+ end
83
+
84
+ links_to.each do |value|
85
+ assert_includes data['paths']['/pricing/']['links_to'], value
86
+ end
87
+
88
+ linked_to_from.each do |value|
89
+ assert_includes data['paths']['/pricing/']['linked_to_from'], value
90
+ end
91
+ end
92
+
93
+ def test_get_path_data
94
+ path = '/pricing/'
95
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
96
+ index.store_path path
97
+
98
+ assets = ['asset1', 'asset2']
99
+ index.store_path_assets(path, assets)
100
+
101
+ links_to = ['link1', 'link2']
102
+ index.store_path_links_to(path, links_to)
103
+
104
+ linked_to_from = ['link3', 'link4']
105
+ index.store_path_linked_to_from(path, linked_to_from)
106
+
107
+ data = index.get_path_data(path)
108
+
109
+ assets.each do |value|
110
+ assert_includes data['asset_dependencies'], value
111
+ end
112
+
113
+ links_to.each do |value|
114
+ assert_includes data['links_to'], value
115
+ end
116
+
117
+ linked_to_from.each do |value|
118
+ assert_includes data['linked_to_from'], value
119
+ end
120
+ end
121
+
122
+ def test_clear_stored_data
123
+ path = '/pricing/'
124
+ index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
125
+ index.store_path path
126
+
127
+ assets = ['asset1', 'asset2']
128
+ index.store_path_assets(path, assets)
129
+
130
+ links_to = ['link1', 'link2']
131
+ index.store_path_links_to(path, links_to)
132
+
133
+ linked_to_from = ['link3', 'link4']
134
+ index.store_path_linked_to_from(path, linked_to_from)
135
+
136
+ index.clear_stored_results
137
+
138
+ empty_results = {
139
+ 'asset_dependencies' => [],
140
+ 'links_to' => [],
141
+ 'linked_to_from' => []
142
+ }
143
+
144
+ assert_equal empty_results, index.get_path_data(path)
145
+ end
146
+
147
+ end
@@ -0,0 +1,10 @@
1
+ require 'test_helper'
2
+
3
+ class ValidationsTest < MiniTest::Test
4
+
5
+ def test_protocol_validation
6
+ error = assert_raises(Crawler::InvalidProtocolError) { Crawler.new('digitalocean.com') }
7
+ assert_equal 'Please specify either http or https', error.message
8
+ end
9
+
10
+ end
@@ -0,0 +1,256 @@
1
+ module DomainHtml
2
+
3
+ # Domain links: '/', 'company/careers', '/pricing'
4
+ # Subdomains: 'cloud.digitalocean.com/...'
5
+ # External links: 'techcrunch/...'
6
+ # Assets: /assets/images/techcrunch.png, /assets/images/cover_create.jpg, //use.typekit.net/wix0mlm.js, /assets/css/style.css
7
+ def root_html
8
+ '<!doctype html>
9
+ <html data-placeholder-focus="false">
10
+ <head>
11
+ <meta charset="utf-8">
12
+ <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
13
+ <title>SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean</title>
14
+ <meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
15
+ <meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
16
+ <meta name="viewport" content="width=1200, maximum-scale=1" />
17
+ <link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
18
+ <script type="text/javascript" src="//use.typekit.net/wix0mlm.js"></script>
19
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
20
+ </head>
21
+ <body class="index">
22
+ <header>
23
+ <div class="wrapper-full">
24
+ <a class="logo" href="/">DigitalOcean</a>
25
+ <a id="hiring" href="/company/careers/">We\'re Hiring!</a>
26
+ <nav id="button-nav">
27
+ <ul>
28
+ <li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
29
+ <li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
30
+ </ul>
31
+ </nav>
32
+ <nav>
33
+ <ul>
34
+ <li><a class="" href="/pricing/">Pricing</a></li>
35
+ </ul>
36
+ </nav>
37
+ </div>
38
+ </header>
39
+ <section class="main">
40
+ <section id="homepage-features" class="homepage-section">
41
+ <div class="wrapper-full">
42
+ <h2>A control panel experience made simple.</h2>
43
+ <div id="browser">
44
+ <div id="last-vid">
45
+ <a href="#" class="restart standard-button">Replay</a>
46
+ </div>
47
+ <p class="url">https://cloud.digitalocean.com/droplets</p>
48
+ <video height="100%" class="feature-video" preload="none" poster="/assets/images/cover_create.jpg">
49
+ <source src="/assets/video/create.mp4" type="video/mp4">
50
+ <source src="/assets/video/create.webm" type="video/webm">
51
+ Your browser does not support the video tag.
52
+ </video>
53
+ </div>
54
+ <ul id="feature-controls">
55
+ <li class="create active" data-video-url="/assets/video/create.webm" data-url-bar="https://cloud.digitalocean.com/droplets">
56
+ <a href="#">
57
+ <div class="icon">
58
+ <span></span>
59
+ </div>
60
+ <h3>Create Droplets</h3>
61
+ <p>Setup and deploy your Droplet\'s configuration in a matter of seconds. </p>
62
+ </a>
63
+ </li>
64
+ </ul>
65
+ </div>
66
+ </section>
67
+ <section id="companies-using">
68
+ <div class="wrapper-full">
69
+ <h3>As Seen On</h3>
70
+ <ul>
71
+ <li><a href="http://techcrunch.com/article/"><img src="/assets/images/techcrunch.png"></a></li>
72
+ </ul>
73
+ </div>
74
+ </section>
75
+ <section class="call-to-action">
76
+ <div class="wrapper-full">
77
+ <h2>Deploy an SSD cloud server in 55 seconds.</h2>
78
+ <p>Sign up for DigitalOcean today.</p>
79
+ <a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
80
+ GET STARTED NOW <span></span>
81
+ </a>
82
+ </div>
83
+ </section>
84
+ </section>
85
+ </body>
86
+ </html>'
87
+ end
88
+
89
+ # Domain links: '/', 'company/careers', '/features'
90
+ # Subdomains: 'cloud.digitalocean.com/...'
91
+ # External links: 'mashable/...'
92
+ # Assets: /assets/css/style.css, /assets/images/mashable.png
93
+ def pricing_html
94
+ '<!doctype html>
95
+ <html data-placeholder-focus="false">
96
+ <head>
97
+ <meta charset="utf-8">
98
+ <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
99
+ <title>Pricing</title>
100
+ <meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
101
+ <meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
102
+ <meta name="viewport" content="width=1200, maximum-scale=1" />
103
+ <link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
104
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
105
+ </head>
106
+ <body class="index">
107
+ <header>
108
+ <div class="wrapper-full">
109
+ <a class="logo" href="/">DigitalOcean</a>
110
+ <a id="hiring" href="/company/careers/">We\'re Hiring!</a>
111
+ <nav id="button-nav">
112
+ <ul>
113
+ <li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
114
+ <li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
115
+ </ul>
116
+ </nav>
117
+ <nav>
118
+ <ul>
119
+ <li><a class="" href="/features/">Features</a></li>
120
+ <li><a id="hiring" href="/company/careers/">We\'re Hiring!</a></li>
121
+ </ul>
122
+ </nav>
123
+ </div>
124
+ </header>
125
+ <section class="main">
126
+ <section id="homepage-features" class="homepage-section">
127
+ <div class="wrapper-full">
128
+ <h2>A control panel experience made simple.</h2>
129
+ <div id="browser">
130
+ <div id="last-vid">
131
+ <a href="#" class="restart standard-button">Replay</a>
132
+ </div>
133
+ <p class="url">https://cloud.digitalocean.com/droplets</p>
134
+ </div>
135
+ <ul id="feature-controls">
136
+ <li class="dns" data-video-url="/assets/video/dns.webm" data-url-bar="https://cloud.digitalocean.com/domains">
137
+ <a href="#">
138
+ <div class="icon">
139
+ <span></span>
140
+ </div>
141
+ <h3>DNS Management</h3>
142
+ <p>Full feature DNS management allows<br>you to easily manage your domains.</p>
143
+ </a>
144
+ </li>
145
+ </ul>
146
+ </div>
147
+ </section>
148
+ <section id="companies-using">
149
+ <div class="wrapper-full">
150
+ <h3>As Seen On</h3>
151
+ <ul>
152
+ <li><a href="http://mashable.com/article/"><img src="/assets/images/mashable.png"></a></li>
153
+ </ul>
154
+ </div>
155
+ </section>
156
+ <section class="call-to-action">
157
+ <div class="wrapper-full">
158
+ <h2>Deploy an SSD cloud server in 55 seconds.</h2>
159
+ <p>Sign up for DigitalOcean today.</p>
160
+ <a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
161
+ GET STARTED NOW <span></span>
162
+ </a>
163
+ </div>
164
+ </section>
165
+ </section>
166
+ </body>
167
+ </html>'
168
+ end
169
+
170
+ # Domain links: '/', 'company/careers', '/pricing'
171
+ # Subdomains: 'cloud.digitalocean.com/...'
172
+ # External links: 'venturebeat/...'
173
+ # Assets: /assets/images/venturebeat.png
174
+ def features_html
175
+ '<!doctype html>
176
+ <html data-placeholder-focus="false">
177
+ <head>
178
+ <meta charset="utf-8">
179
+ <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
180
+ <title>Features</title>
181
+ <meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
182
+ <meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
183
+ <meta name="viewport" content="width=1200, maximum-scale=1" />
184
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
185
+ </head>
186
+ <body class="index">
187
+ <header>
188
+ <div class="wrapper-full">
189
+ <a class="logo" href="/">DigitalOcean</a>
190
+ <a id="hiring" href="/company/careers/">We\'re Hiring!</a>
191
+ <nav id="button-nav">
192
+ <ul>
193
+ <li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
194
+ <li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
195
+ </ul>
196
+ </nav>
197
+ <nav>
198
+ <ul>
199
+ <li><a class="" href="/pricing/">Pricing</a></li>
200
+ </ul>
201
+ </nav>
202
+ </div>
203
+ </header>
204
+ <section class="main">
205
+ <section id="homepage-features" class="homepage-section">
206
+ <div class="wrapper-full">
207
+ <h2>A control panel experience made simple.</h2>
208
+ <div id="browser">
209
+ <div id="last-vid">
210
+ <a href="#" class="restart standard-button">Replay</a>
211
+ </div>
212
+ </div>
213
+ <ul id="feature-controls">
214
+ <li class="backups" data-video-url="/assets/video/snapshot.webm" data-url-bar="https://cloud.digitalocean.com/images">
215
+ <a href="#">
216
+ <div class="icon">
217
+ <span></span>
218
+ </div>
219
+ <h3>Snapshots</h3>
220
+ <p>Build a new server from a snapshot<br>you took, saving you configuration time.</p>
221
+ </a>
222
+ </li>
223
+ </ul>
224
+ </div>
225
+ </section>
226
+ <section id="companies-using">
227
+ <div class="wrapper-full">
228
+ <h3>As Seen On</h3>
229
+ <ul>
230
+ <li><a href="http://venturebeat.com/article/"><img src="/assets/images/venturebeat.png"></a></li>
231
+ </ul>
232
+ </div>
233
+ </section>
234
+ <section class="call-to-action">
235
+ <div class="wrapper-full">
236
+ <h2>Deploy an SSD cloud server in 55 seconds.</h2>
237
+ <p>Sign up for DigitalOcean today.</p>
238
+ <a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
239
+ GET STARTED NOW <span></span>
240
+ </a>
241
+ </div>
242
+ </section>
243
+ </section>
244
+ </body>
245
+ </html>'
246
+ end
247
+
248
+ def careers_html
249
+ '<!doctype html>
250
+ <html data-placeholder-focus="false">
251
+ <body>
252
+ No substantial content
253
+ </body>
254
+ </html>'
255
+ end
256
+ end