ruby-crawler 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +11 -0
- data/crawler.gemspec +30 -0
- data/lib/crawler.rb +59 -0
- data/lib/crawler/document.rb +33 -0
- data/lib/crawler/document_parser.rb +45 -0
- data/lib/crawler/error.rb +10 -0
- data/lib/crawler/formatting.rb +16 -0
- data/lib/crawler/http.rb +17 -0
- data/lib/crawler/index.rb +63 -0
- data/lib/crawler/storage.rb +166 -0
- data/lib/crawler/validations.rb +12 -0
- data/lib/crawler/version.rb +3 -0
- data/test/crawler/crawler_test.rb +46 -0
- data/test/crawler/document_test.rb +23 -0
- data/test/crawler/formatting_test.rb +23 -0
- data/test/crawler/http_test.rb +23 -0
- data/test/crawler/index_test.rb +78 -0
- data/test/crawler/storage_test.rb +147 -0
- data/test/crawler/validations_test.rb +10 -0
- data/test/support/domain_html.rb +256 -0
- data/test/test_helper.rb +50 -0
- metadata +191 -0
@@ -0,0 +1,12 @@
|
|
1
|
+
module Crawler
|
2
|
+
module Validations
|
3
|
+
|
4
|
+
VALID_PROTOCOLS = ['http', 'https'].freeze
|
5
|
+
|
6
|
+
# Validates protocol is http or https
|
7
|
+
#
|
8
|
+
def validate_protocol
|
9
|
+
raise InvalidProtocolError.new('Please specify either http or https') unless VALID_PROTOCOLS.include? base_uri.scheme
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class CrawlerTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_initialization
|
6
|
+
crawler = Crawler.new('https://www.digitalocean.com')
|
7
|
+
assert_equal 'www.digitalocean.com', crawler.base_uri.hostname
|
8
|
+
assert_equal 'https://www.digitalocean.com', crawler.base_url
|
9
|
+
assert_equal 'https', crawler.base_uri.scheme
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_crawl
|
13
|
+
crawler = Crawler.new('https://www.digitalocean.com')
|
14
|
+
crawler.crawl
|
15
|
+
results = crawler.results
|
16
|
+
assert_equal expected_crawl_results, results
|
17
|
+
end
|
18
|
+
|
19
|
+
def expected_crawl_results
|
20
|
+
{
|
21
|
+
"domain" => "www.digitalocean.com",
|
22
|
+
"paths" => {
|
23
|
+
"/company/careers/" => {
|
24
|
+
"asset_dependencies" => [],
|
25
|
+
"links_to" => [],
|
26
|
+
"linked_to_from" => ["/features/", "/pricing/", "/"]
|
27
|
+
},
|
28
|
+
"/" => {
|
29
|
+
"asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
|
30
|
+
"links_to" => ["/company/careers/", "/pricing/", "/"],
|
31
|
+
"linked_to_from" => ["/features/", "/pricing/"]
|
32
|
+
},
|
33
|
+
"/pricing/" => {
|
34
|
+
"asset_dependencies" => ["/assets/css/style.css", "/assets/images/mashable.png"],
|
35
|
+
"links_to" => ["/company/careers/", "/features/", "/"],
|
36
|
+
"linked_to_from" => ["/features/", "/"]
|
37
|
+
},
|
38
|
+
"/features/" => {
|
39
|
+
"asset_dependencies" => ["/assets/images/venturebeat.png"],
|
40
|
+
"links_to" => ["/company/careers/", "/pricing/", "/"],
|
41
|
+
"linked_to_from" => ["/pricing/"]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class DocumentTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_document_parsing
|
6
|
+
document = Crawler::Document.new('https://www.digitalocean.com/')
|
7
|
+
|
8
|
+
# Test link discovery
|
9
|
+
['http://techcrunch.com/article/', 'https://cloud.digitalocean.com/login', '/company/careers/'].each do |link|
|
10
|
+
assert_includes document.links, link
|
11
|
+
end
|
12
|
+
|
13
|
+
# Test domain specific link discovery
|
14
|
+
assert_includes document.domain_specific_paths, '/company/careers/'
|
15
|
+
|
16
|
+
['https://twitter.com/digitalocean', 'https://status.digitalocean.com/', '#'].each do |link|
|
17
|
+
refute_includes document.domain_specific_paths, link
|
18
|
+
end
|
19
|
+
|
20
|
+
# Test assest discovery
|
21
|
+
assert_includes document.static_assets, '/assets/images/techcrunch.png'
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'crawler/formatting'
|
3
|
+
|
4
|
+
class FormattingTest < MiniTest::Test
|
5
|
+
include Crawler::Formatting
|
6
|
+
|
7
|
+
def test_normalize_path
|
8
|
+
path = '/'
|
9
|
+
assert_equal '/', normalize_path(path)
|
10
|
+
|
11
|
+
path = '/pricing'
|
12
|
+
assert_equal '/pricing/', normalize_path(path)
|
13
|
+
|
14
|
+
path = '/pricing/'
|
15
|
+
assert_equal '/pricing/', normalize_path(path)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_construct_url
|
19
|
+
url = 'https://www.digitalocean.com'
|
20
|
+
uri = URI.parse(url)
|
21
|
+
assert_equal url, construct_url(uri)
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class HttpTest < MiniTest::Test
|
4
|
+
include Crawler::Http
|
5
|
+
|
6
|
+
def test_request
|
7
|
+
domain = 'https://www.digitalocean.com'
|
8
|
+
response = request domain
|
9
|
+
assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_request_follows_redirect
|
13
|
+
domain = 'https://www.digitalocean.com/redirect'
|
14
|
+
response = request domain
|
15
|
+
assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_bad_request_is_rescued
|
19
|
+
domain = 'https://www.digitalocean.com/bad_request'
|
20
|
+
response = request domain
|
21
|
+
assert_equal '', response
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class IndexTest < MiniTest::Test
|
4
|
+
include Crawler::Formatting
|
5
|
+
|
6
|
+
def test_initialization
|
7
|
+
domain = 'https://www.digitalocean.com'
|
8
|
+
index = Crawler::Index.new(Addressable::URI.parse(domain))
|
9
|
+
|
10
|
+
expected_results = {
|
11
|
+
'domain' => 'www.digitalocean.com',
|
12
|
+
'paths' => {}
|
13
|
+
}
|
14
|
+
|
15
|
+
assert_equal expected_results, index.results
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_document_consumption
|
19
|
+
domain = "https://www.digitalocean.com"
|
20
|
+
document = Crawler::Document.new(domain)
|
21
|
+
uri = Addressable::URI.parse(domain)
|
22
|
+
index = Crawler::Index.new(uri)
|
23
|
+
index.consume_document uri.path, document
|
24
|
+
|
25
|
+
# Test domain is correct
|
26
|
+
assert_equal 'www.digitalocean.com', index.base_uri.hostname
|
27
|
+
|
28
|
+
# Test that urls / links are properly recorded
|
29
|
+
results = index.results
|
30
|
+
|
31
|
+
["/company/careers/", "/", "/pricing/"].each do |key|
|
32
|
+
assert_includes results['paths'].keys, key
|
33
|
+
end
|
34
|
+
|
35
|
+
paths_to_visit = index.get_paths_to_visit
|
36
|
+
["/company/careers/", "/pricing/"].each do |path|
|
37
|
+
assert_includes paths_to_visit, path
|
38
|
+
end
|
39
|
+
|
40
|
+
paths = index.get_stored_paths
|
41
|
+
["/"].each do |path|
|
42
|
+
assert_includes paths, path
|
43
|
+
end
|
44
|
+
|
45
|
+
# Test that the assets are recorded
|
46
|
+
assets = index.get_path_assets('/')
|
47
|
+
["/assets/images/techcrunch.png", "/assets/images/cover_create.jpg", "/assets/css/style.css"].each do |asset|
|
48
|
+
assert_includes assets, asset
|
49
|
+
end
|
50
|
+
|
51
|
+
# Test the indexed results
|
52
|
+
assert_equal expected_index_results, index.results
|
53
|
+
end
|
54
|
+
|
55
|
+
def expected_index_results
|
56
|
+
{
|
57
|
+
"domain" => "www.digitalocean.com",
|
58
|
+
"paths" => {
|
59
|
+
"/company/careers/" => {
|
60
|
+
"asset_dependencies" => [],
|
61
|
+
"links_to" => [],
|
62
|
+
"linked_to_from" => ["/"]
|
63
|
+
},
|
64
|
+
"/pricing/" => {
|
65
|
+
"asset_dependencies" => [],
|
66
|
+
"links_to" => [],
|
67
|
+
"linked_to_from"=>["/"]
|
68
|
+
},
|
69
|
+
"/" => {
|
70
|
+
"asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
|
71
|
+
"links_to" => ["/company/careers/", "/pricing/", "/"],
|
72
|
+
"linked_to_from" => []
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class StorageTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_store_path_and_stored_paths
|
6
|
+
path = '/pricing/'
|
7
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
8
|
+
index.store_path path
|
9
|
+
assert_equal [path], index.get_stored_paths
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_storing_path_assets
|
13
|
+
path = '/pricing/'
|
14
|
+
assets = ['asset1', 'asset2']
|
15
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
16
|
+
index.store_path_assets(path, assets)
|
17
|
+
|
18
|
+
stored_assets = index.get_path_assets(path)
|
19
|
+
assets.each do |asset|
|
20
|
+
assert_includes stored_assets, asset
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_storing_path_links_to
|
25
|
+
path = '/pricing/'
|
26
|
+
links = ['link1', 'link2']
|
27
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
28
|
+
index.store_path_links_to(path, links)
|
29
|
+
|
30
|
+
stored_links = index.get_path_links_to(path)
|
31
|
+
|
32
|
+
links.each do |link|
|
33
|
+
assert_includes stored_links, link
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_storing_path_linked_to_from
|
38
|
+
path = '/pricing/'
|
39
|
+
links = ['link1', 'link2']
|
40
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
41
|
+
index.store_path_linked_to_from(path, links)
|
42
|
+
|
43
|
+
stored_links = index.get_path_linked_to_from(path)
|
44
|
+
links.each do |link|
|
45
|
+
assert_includes stored_links, link
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_storing_paths_visited
|
50
|
+
path = '/pricing/'
|
51
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
52
|
+
index.store_path_visited(path)
|
53
|
+
assert_equal [path], index.get_paths_visited
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_queuing_paths
|
57
|
+
paths = ['/pricing/', '/features/']
|
58
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
59
|
+
index.store_paths_to_visit(paths)
|
60
|
+
assert_equal paths.reverse, index.get_paths_to_visit
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_get_domain_data
|
64
|
+
path = '/pricing/'
|
65
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
66
|
+
index.store_path path
|
67
|
+
|
68
|
+
assets = ['asset1', 'asset2']
|
69
|
+
index.store_path_assets(path, assets)
|
70
|
+
|
71
|
+
links_to = ['link1', 'link2']
|
72
|
+
index.store_path_links_to(path, links_to)
|
73
|
+
|
74
|
+
linked_to_from = ['link3', 'link4']
|
75
|
+
index.store_path_linked_to_from(path, linked_to_from)
|
76
|
+
|
77
|
+
data = index.get_domain_data
|
78
|
+
assert_equal 'www.digitalocean.com', data['domain']
|
79
|
+
|
80
|
+
assets.each do |value|
|
81
|
+
assert_includes data['paths']['/pricing/']['asset_dependencies'], value
|
82
|
+
end
|
83
|
+
|
84
|
+
links_to.each do |value|
|
85
|
+
assert_includes data['paths']['/pricing/']['links_to'], value
|
86
|
+
end
|
87
|
+
|
88
|
+
linked_to_from.each do |value|
|
89
|
+
assert_includes data['paths']['/pricing/']['linked_to_from'], value
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_get_path_data
|
94
|
+
path = '/pricing/'
|
95
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
96
|
+
index.store_path path
|
97
|
+
|
98
|
+
assets = ['asset1', 'asset2']
|
99
|
+
index.store_path_assets(path, assets)
|
100
|
+
|
101
|
+
links_to = ['link1', 'link2']
|
102
|
+
index.store_path_links_to(path, links_to)
|
103
|
+
|
104
|
+
linked_to_from = ['link3', 'link4']
|
105
|
+
index.store_path_linked_to_from(path, linked_to_from)
|
106
|
+
|
107
|
+
data = index.get_path_data(path)
|
108
|
+
|
109
|
+
assets.each do |value|
|
110
|
+
assert_includes data['asset_dependencies'], value
|
111
|
+
end
|
112
|
+
|
113
|
+
links_to.each do |value|
|
114
|
+
assert_includes data['links_to'], value
|
115
|
+
end
|
116
|
+
|
117
|
+
linked_to_from.each do |value|
|
118
|
+
assert_includes data['linked_to_from'], value
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_clear_stored_data
|
123
|
+
path = '/pricing/'
|
124
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
125
|
+
index.store_path path
|
126
|
+
|
127
|
+
assets = ['asset1', 'asset2']
|
128
|
+
index.store_path_assets(path, assets)
|
129
|
+
|
130
|
+
links_to = ['link1', 'link2']
|
131
|
+
index.store_path_links_to(path, links_to)
|
132
|
+
|
133
|
+
linked_to_from = ['link3', 'link4']
|
134
|
+
index.store_path_linked_to_from(path, linked_to_from)
|
135
|
+
|
136
|
+
index.clear_stored_results
|
137
|
+
|
138
|
+
empty_results = {
|
139
|
+
'asset_dependencies' => [],
|
140
|
+
'links_to' => [],
|
141
|
+
'linked_to_from' => []
|
142
|
+
}
|
143
|
+
|
144
|
+
assert_equal empty_results, index.get_path_data(path)
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class ValidationsTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_protocol_validation
|
6
|
+
error = assert_raises(Crawler::InvalidProtocolError) { Crawler.new('digitalocean.com') }
|
7
|
+
assert_equal 'Please specify either http or https', error.message
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
@@ -0,0 +1,256 @@
|
|
1
|
+
module DomainHtml
|
2
|
+
|
3
|
+
# Domain links: '/', 'company/careers', '/pricing'
|
4
|
+
# Subdomains: 'cloud.digitalocean.com/...'
|
5
|
+
# External links: 'techcrunch/...'
|
6
|
+
# Assets: /assets/images/techcrunch.png, /assets/images/cover_create.jpg, //use.typekit.net/wix0mlm.js, /assets/css/style.css
|
7
|
+
def root_html
|
8
|
+
'<!doctype html>
|
9
|
+
<html data-placeholder-focus="false">
|
10
|
+
<head>
|
11
|
+
<meta charset="utf-8">
|
12
|
+
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
|
13
|
+
<title>SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean</title>
|
14
|
+
<meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
|
15
|
+
<meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
|
16
|
+
<meta name="viewport" content="width=1200, maximum-scale=1" />
|
17
|
+
<link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
|
18
|
+
<script type="text/javascript" src="//use.typekit.net/wix0mlm.js"></script>
|
19
|
+
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
20
|
+
</head>
|
21
|
+
<body class="index">
|
22
|
+
<header>
|
23
|
+
<div class="wrapper-full">
|
24
|
+
<a class="logo" href="/">DigitalOcean</a>
|
25
|
+
<a id="hiring" href="/company/careers/">We\'re Hiring!</a>
|
26
|
+
<nav id="button-nav">
|
27
|
+
<ul>
|
28
|
+
<li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
|
29
|
+
<li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
|
30
|
+
</ul>
|
31
|
+
</nav>
|
32
|
+
<nav>
|
33
|
+
<ul>
|
34
|
+
<li><a class="" href="/pricing/">Pricing</a></li>
|
35
|
+
</ul>
|
36
|
+
</nav>
|
37
|
+
</div>
|
38
|
+
</header>
|
39
|
+
<section class="main">
|
40
|
+
<section id="homepage-features" class="homepage-section">
|
41
|
+
<div class="wrapper-full">
|
42
|
+
<h2>A control panel experience made simple.</h2>
|
43
|
+
<div id="browser">
|
44
|
+
<div id="last-vid">
|
45
|
+
<a href="#" class="restart standard-button">Replay</a>
|
46
|
+
</div>
|
47
|
+
<p class="url">https://cloud.digitalocean.com/droplets</p>
|
48
|
+
<video height="100%" class="feature-video" preload="none" poster="/assets/images/cover_create.jpg">
|
49
|
+
<source src="/assets/video/create.mp4" type="video/mp4">
|
50
|
+
<source src="/assets/video/create.webm" type="video/webm">
|
51
|
+
Your browser does not support the video tag.
|
52
|
+
</video>
|
53
|
+
</div>
|
54
|
+
<ul id="feature-controls">
|
55
|
+
<li class="create active" data-video-url="/assets/video/create.webm" data-url-bar="https://cloud.digitalocean.com/droplets">
|
56
|
+
<a href="#">
|
57
|
+
<div class="icon">
|
58
|
+
<span></span>
|
59
|
+
</div>
|
60
|
+
<h3>Create Droplets</h3>
|
61
|
+
<p>Setup and deploy your Droplet\'s configuration in a matter of seconds. </p>
|
62
|
+
</a>
|
63
|
+
</li>
|
64
|
+
</ul>
|
65
|
+
</div>
|
66
|
+
</section>
|
67
|
+
<section id="companies-using">
|
68
|
+
<div class="wrapper-full">
|
69
|
+
<h3>As Seen On</h3>
|
70
|
+
<ul>
|
71
|
+
<li><a href="http://techcrunch.com/article/"><img src="/assets/images/techcrunch.png"></a></li>
|
72
|
+
</ul>
|
73
|
+
</div>
|
74
|
+
</section>
|
75
|
+
<section class="call-to-action">
|
76
|
+
<div class="wrapper-full">
|
77
|
+
<h2>Deploy an SSD cloud server in 55 seconds.</h2>
|
78
|
+
<p>Sign up for DigitalOcean today.</p>
|
79
|
+
<a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
|
80
|
+
GET STARTED NOW <span></span>
|
81
|
+
</a>
|
82
|
+
</div>
|
83
|
+
</section>
|
84
|
+
</section>
|
85
|
+
</body>
|
86
|
+
</html>'
|
87
|
+
end
|
88
|
+
|
89
|
+
# Domain links: '/', 'company/careers', '/features'
|
90
|
+
# Subdomains: 'cloud.digitalocean.com/...'
|
91
|
+
# External links: 'mashable/...'
|
92
|
+
# Assets: /assets/css/style.css, /assets/images/mashable.png
|
93
|
+
def pricing_html
|
94
|
+
'<!doctype html>
|
95
|
+
<html data-placeholder-focus="false">
|
96
|
+
<head>
|
97
|
+
<meta charset="utf-8">
|
98
|
+
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
|
99
|
+
<title>Pricing</title>
|
100
|
+
<meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
|
101
|
+
<meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
|
102
|
+
<meta name="viewport" content="width=1200, maximum-scale=1" />
|
103
|
+
<link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
|
104
|
+
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
105
|
+
</head>
|
106
|
+
<body class="index">
|
107
|
+
<header>
|
108
|
+
<div class="wrapper-full">
|
109
|
+
<a class="logo" href="/">DigitalOcean</a>
|
110
|
+
<a id="hiring" href="/company/careers/">We\'re Hiring!</a>
|
111
|
+
<nav id="button-nav">
|
112
|
+
<ul>
|
113
|
+
<li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
|
114
|
+
<li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
|
115
|
+
</ul>
|
116
|
+
</nav>
|
117
|
+
<nav>
|
118
|
+
<ul>
|
119
|
+
<li><a class="" href="/features/">Features</a></li>
|
120
|
+
<li><a id="hiring" href="/company/careers/">We\'re Hiring!</a></li>
|
121
|
+
</ul>
|
122
|
+
</nav>
|
123
|
+
</div>
|
124
|
+
</header>
|
125
|
+
<section class="main">
|
126
|
+
<section id="homepage-features" class="homepage-section">
|
127
|
+
<div class="wrapper-full">
|
128
|
+
<h2>A control panel experience made simple.</h2>
|
129
|
+
<div id="browser">
|
130
|
+
<div id="last-vid">
|
131
|
+
<a href="#" class="restart standard-button">Replay</a>
|
132
|
+
</div>
|
133
|
+
<p class="url">https://cloud.digitalocean.com/droplets</p>
|
134
|
+
</div>
|
135
|
+
<ul id="feature-controls">
|
136
|
+
<li class="dns" data-video-url="/assets/video/dns.webm" data-url-bar="https://cloud.digitalocean.com/domains">
|
137
|
+
<a href="#">
|
138
|
+
<div class="icon">
|
139
|
+
<span></span>
|
140
|
+
</div>
|
141
|
+
<h3>DNS Management</h3>
|
142
|
+
<p>Full feature DNS management allows<br>you to easily manage your domains.</p>
|
143
|
+
</a>
|
144
|
+
</li>
|
145
|
+
</ul>
|
146
|
+
</div>
|
147
|
+
</section>
|
148
|
+
<section id="companies-using">
|
149
|
+
<div class="wrapper-full">
|
150
|
+
<h3>As Seen On</h3>
|
151
|
+
<ul>
|
152
|
+
<li><a href="http://mashable.com/article/"><img src="/assets/images/mashable.png"></a></li>
|
153
|
+
</ul>
|
154
|
+
</div>
|
155
|
+
</section>
|
156
|
+
<section class="call-to-action">
|
157
|
+
<div class="wrapper-full">
|
158
|
+
<h2>Deploy an SSD cloud server in 55 seconds.</h2>
|
159
|
+
<p>Sign up for DigitalOcean today.</p>
|
160
|
+
<a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
|
161
|
+
GET STARTED NOW <span></span>
|
162
|
+
</a>
|
163
|
+
</div>
|
164
|
+
</section>
|
165
|
+
</section>
|
166
|
+
</body>
|
167
|
+
</html>'
|
168
|
+
end
|
169
|
+
|
170
|
+
# Domain links: '/', 'company/careers', '/pricing'
|
171
|
+
# Subdomains: 'cloud.digitalocean.com/...'
|
172
|
+
# External links: 'venturebeat/...'
|
173
|
+
# Assets: /assets/images/venturebeat.png
|
174
|
+
def features_html
|
175
|
+
'<!doctype html>
|
176
|
+
<html data-placeholder-focus="false">
|
177
|
+
<head>
|
178
|
+
<meta charset="utf-8">
|
179
|
+
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
|
180
|
+
<title>Features</title>
|
181
|
+
<meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
|
182
|
+
<meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
|
183
|
+
<meta name="viewport" content="width=1200, maximum-scale=1" />
|
184
|
+
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
185
|
+
</head>
|
186
|
+
<body class="index">
|
187
|
+
<header>
|
188
|
+
<div class="wrapper-full">
|
189
|
+
<a class="logo" href="/">DigitalOcean</a>
|
190
|
+
<a id="hiring" href="/company/careers/">We\'re Hiring!</a>
|
191
|
+
<nav id="button-nav">
|
192
|
+
<ul>
|
193
|
+
<li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
|
194
|
+
<li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
|
195
|
+
</ul>
|
196
|
+
</nav>
|
197
|
+
<nav>
|
198
|
+
<ul>
|
199
|
+
<li><a class="" href="/pricing/">Pricing</a></li>
|
200
|
+
</ul>
|
201
|
+
</nav>
|
202
|
+
</div>
|
203
|
+
</header>
|
204
|
+
<section class="main">
|
205
|
+
<section id="homepage-features" class="homepage-section">
|
206
|
+
<div class="wrapper-full">
|
207
|
+
<h2>A control panel experience made simple.</h2>
|
208
|
+
<div id="browser">
|
209
|
+
<div id="last-vid">
|
210
|
+
<a href="#" class="restart standard-button">Replay</a>
|
211
|
+
</div>
|
212
|
+
</div>
|
213
|
+
<ul id="feature-controls">
|
214
|
+
<li class="backups" data-video-url="/assets/video/snapshot.webm" data-url-bar="https://cloud.digitalocean.com/images">
|
215
|
+
<a href="#">
|
216
|
+
<div class="icon">
|
217
|
+
<span></span>
|
218
|
+
</div>
|
219
|
+
<h3>Snapshots</h3>
|
220
|
+
<p>Build a new server from a snapshot<br>you took, saving you configuration time.</p>
|
221
|
+
</a>
|
222
|
+
</li>
|
223
|
+
</ul>
|
224
|
+
</div>
|
225
|
+
</section>
|
226
|
+
<section id="companies-using">
|
227
|
+
<div class="wrapper-full">
|
228
|
+
<h3>As Seen On</h3>
|
229
|
+
<ul>
|
230
|
+
<li><a href="http://venturebeat.com/article/"><img src="/assets/images/venturebeat.png"></a></li>
|
231
|
+
</ul>
|
232
|
+
</div>
|
233
|
+
</section>
|
234
|
+
<section class="call-to-action">
|
235
|
+
<div class="wrapper-full">
|
236
|
+
<h2>Deploy an SSD cloud server in 55 seconds.</h2>
|
237
|
+
<p>Sign up for DigitalOcean today.</p>
|
238
|
+
<a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
|
239
|
+
GET STARTED NOW <span></span>
|
240
|
+
</a>
|
241
|
+
</div>
|
242
|
+
</section>
|
243
|
+
</section>
|
244
|
+
</body>
|
245
|
+
</html>'
|
246
|
+
end
|
247
|
+
|
248
|
+
def careers_html
|
249
|
+
'<!doctype html>
|
250
|
+
<html data-placeholder-focus="false">
|
251
|
+
<body>
|
252
|
+
No substantial content
|
253
|
+
</body>
|
254
|
+
</html>'
|
255
|
+
end
|
256
|
+
end
|