ruby-crawler 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +11 -0
- data/crawler.gemspec +30 -0
- data/lib/crawler.rb +59 -0
- data/lib/crawler/document.rb +33 -0
- data/lib/crawler/document_parser.rb +45 -0
- data/lib/crawler/error.rb +10 -0
- data/lib/crawler/formatting.rb +16 -0
- data/lib/crawler/http.rb +17 -0
- data/lib/crawler/index.rb +63 -0
- data/lib/crawler/storage.rb +166 -0
- data/lib/crawler/validations.rb +12 -0
- data/lib/crawler/version.rb +3 -0
- data/test/crawler/crawler_test.rb +46 -0
- data/test/crawler/document_test.rb +23 -0
- data/test/crawler/formatting_test.rb +23 -0
- data/test/crawler/http_test.rb +23 -0
- data/test/crawler/index_test.rb +78 -0
- data/test/crawler/storage_test.rb +147 -0
- data/test/crawler/validations_test.rb +10 -0
- data/test/support/domain_html.rb +256 -0
- data/test/test_helper.rb +50 -0
- metadata +191 -0
@@ -0,0 +1,12 @@
|
|
1
|
+
module Crawler
|
2
|
+
module Validations
|
3
|
+
|
4
|
+
VALID_PROTOCOLS = ['http', 'https'].freeze
|
5
|
+
|
6
|
+
# Validates protocol is http or https
|
7
|
+
#
|
8
|
+
def validate_protocol
|
9
|
+
raise InvalidProtocolError.new('Please specify either http or https') unless VALID_PROTOCOLS.include? base_uri.scheme
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class CrawlerTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_initialization
|
6
|
+
crawler = Crawler.new('https://www.digitalocean.com')
|
7
|
+
assert_equal 'www.digitalocean.com', crawler.base_uri.hostname
|
8
|
+
assert_equal 'https://www.digitalocean.com', crawler.base_url
|
9
|
+
assert_equal 'https', crawler.base_uri.scheme
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_crawl
|
13
|
+
crawler = Crawler.new('https://www.digitalocean.com')
|
14
|
+
crawler.crawl
|
15
|
+
results = crawler.results
|
16
|
+
assert_equal expected_crawl_results, results
|
17
|
+
end
|
18
|
+
|
19
|
+
def expected_crawl_results
|
20
|
+
{
|
21
|
+
"domain" => "www.digitalocean.com",
|
22
|
+
"paths" => {
|
23
|
+
"/company/careers/" => {
|
24
|
+
"asset_dependencies" => [],
|
25
|
+
"links_to" => [],
|
26
|
+
"linked_to_from" => ["/features/", "/pricing/", "/"]
|
27
|
+
},
|
28
|
+
"/" => {
|
29
|
+
"asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
|
30
|
+
"links_to" => ["/company/careers/", "/pricing/", "/"],
|
31
|
+
"linked_to_from" => ["/features/", "/pricing/"]
|
32
|
+
},
|
33
|
+
"/pricing/" => {
|
34
|
+
"asset_dependencies" => ["/assets/css/style.css", "/assets/images/mashable.png"],
|
35
|
+
"links_to" => ["/company/careers/", "/features/", "/"],
|
36
|
+
"linked_to_from" => ["/features/", "/"]
|
37
|
+
},
|
38
|
+
"/features/" => {
|
39
|
+
"asset_dependencies" => ["/assets/images/venturebeat.png"],
|
40
|
+
"links_to" => ["/company/careers/", "/pricing/", "/"],
|
41
|
+
"linked_to_from" => ["/pricing/"]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class DocumentTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_document_parsing
|
6
|
+
document = Crawler::Document.new('https://www.digitalocean.com/')
|
7
|
+
|
8
|
+
# Test link discovery
|
9
|
+
['http://techcrunch.com/article/', 'https://cloud.digitalocean.com/login', '/company/careers/'].each do |link|
|
10
|
+
assert_includes document.links, link
|
11
|
+
end
|
12
|
+
|
13
|
+
# Test domain specific link discovery
|
14
|
+
assert_includes document.domain_specific_paths, '/company/careers/'
|
15
|
+
|
16
|
+
['https://twitter.com/digitalocean', 'https://status.digitalocean.com/', '#'].each do |link|
|
17
|
+
refute_includes document.domain_specific_paths, link
|
18
|
+
end
|
19
|
+
|
20
|
+
# Test assest discovery
|
21
|
+
assert_includes document.static_assets, '/assets/images/techcrunch.png'
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'crawler/formatting'
|
3
|
+
|
4
|
+
class FormattingTest < MiniTest::Test
|
5
|
+
include Crawler::Formatting
|
6
|
+
|
7
|
+
def test_normalize_path
|
8
|
+
path = '/'
|
9
|
+
assert_equal '/', normalize_path(path)
|
10
|
+
|
11
|
+
path = '/pricing'
|
12
|
+
assert_equal '/pricing/', normalize_path(path)
|
13
|
+
|
14
|
+
path = '/pricing/'
|
15
|
+
assert_equal '/pricing/', normalize_path(path)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_construct_url
|
19
|
+
url = 'https://www.digitalocean.com'
|
20
|
+
uri = URI.parse(url)
|
21
|
+
assert_equal url, construct_url(uri)
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class HttpTest < MiniTest::Test
|
4
|
+
include Crawler::Http
|
5
|
+
|
6
|
+
def test_request
|
7
|
+
domain = 'https://www.digitalocean.com'
|
8
|
+
response = request domain
|
9
|
+
assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_request_follows_redirect
|
13
|
+
domain = 'https://www.digitalocean.com/redirect'
|
14
|
+
response = request domain
|
15
|
+
assert_equal 'SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean', Nokogiri::HTML(response).css('title').text
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_bad_request_is_rescued
|
19
|
+
domain = 'https://www.digitalocean.com/bad_request'
|
20
|
+
response = request domain
|
21
|
+
assert_equal '', response
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class IndexTest < MiniTest::Test
|
4
|
+
include Crawler::Formatting
|
5
|
+
|
6
|
+
def test_initialization
|
7
|
+
domain = 'https://www.digitalocean.com'
|
8
|
+
index = Crawler::Index.new(Addressable::URI.parse(domain))
|
9
|
+
|
10
|
+
expected_results = {
|
11
|
+
'domain' => 'www.digitalocean.com',
|
12
|
+
'paths' => {}
|
13
|
+
}
|
14
|
+
|
15
|
+
assert_equal expected_results, index.results
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_document_consumption
|
19
|
+
domain = "https://www.digitalocean.com"
|
20
|
+
document = Crawler::Document.new(domain)
|
21
|
+
uri = Addressable::URI.parse(domain)
|
22
|
+
index = Crawler::Index.new(uri)
|
23
|
+
index.consume_document uri.path, document
|
24
|
+
|
25
|
+
# Test domain is correct
|
26
|
+
assert_equal 'www.digitalocean.com', index.base_uri.hostname
|
27
|
+
|
28
|
+
# Test that urls / links are properly recorded
|
29
|
+
results = index.results
|
30
|
+
|
31
|
+
["/company/careers/", "/", "/pricing/"].each do |key|
|
32
|
+
assert_includes results['paths'].keys, key
|
33
|
+
end
|
34
|
+
|
35
|
+
paths_to_visit = index.get_paths_to_visit
|
36
|
+
["/company/careers/", "/pricing/"].each do |path|
|
37
|
+
assert_includes paths_to_visit, path
|
38
|
+
end
|
39
|
+
|
40
|
+
paths = index.get_stored_paths
|
41
|
+
["/"].each do |path|
|
42
|
+
assert_includes paths, path
|
43
|
+
end
|
44
|
+
|
45
|
+
# Test that the assets are recorded
|
46
|
+
assets = index.get_path_assets('/')
|
47
|
+
["/assets/images/techcrunch.png", "/assets/images/cover_create.jpg", "/assets/css/style.css"].each do |asset|
|
48
|
+
assert_includes assets, asset
|
49
|
+
end
|
50
|
+
|
51
|
+
# Test the indexed results
|
52
|
+
assert_equal expected_index_results, index.results
|
53
|
+
end
|
54
|
+
|
55
|
+
def expected_index_results
|
56
|
+
{
|
57
|
+
"domain" => "www.digitalocean.com",
|
58
|
+
"paths" => {
|
59
|
+
"/company/careers/" => {
|
60
|
+
"asset_dependencies" => [],
|
61
|
+
"links_to" => [],
|
62
|
+
"linked_to_from" => ["/"]
|
63
|
+
},
|
64
|
+
"/pricing/" => {
|
65
|
+
"asset_dependencies" => [],
|
66
|
+
"links_to" => [],
|
67
|
+
"linked_to_from"=>["/"]
|
68
|
+
},
|
69
|
+
"/" => {
|
70
|
+
"asset_dependencies" => ["//use.typekit.net/wix0mlm.js", "/assets/css/style.css", "/assets/images/cover_create.jpg", "/assets/images/techcrunch.png"],
|
71
|
+
"links_to" => ["/company/careers/", "/pricing/", "/"],
|
72
|
+
"linked_to_from" => []
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class StorageTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_store_path_and_stored_paths
|
6
|
+
path = '/pricing/'
|
7
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
8
|
+
index.store_path path
|
9
|
+
assert_equal [path], index.get_stored_paths
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_storing_path_assets
|
13
|
+
path = '/pricing/'
|
14
|
+
assets = ['asset1', 'asset2']
|
15
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
16
|
+
index.store_path_assets(path, assets)
|
17
|
+
|
18
|
+
stored_assets = index.get_path_assets(path)
|
19
|
+
assets.each do |asset|
|
20
|
+
assert_includes stored_assets, asset
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_storing_path_links_to
|
25
|
+
path = '/pricing/'
|
26
|
+
links = ['link1', 'link2']
|
27
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
28
|
+
index.store_path_links_to(path, links)
|
29
|
+
|
30
|
+
stored_links = index.get_path_links_to(path)
|
31
|
+
|
32
|
+
links.each do |link|
|
33
|
+
assert_includes stored_links, link
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_storing_path_linked_to_from
|
38
|
+
path = '/pricing/'
|
39
|
+
links = ['link1', 'link2']
|
40
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
41
|
+
index.store_path_linked_to_from(path, links)
|
42
|
+
|
43
|
+
stored_links = index.get_path_linked_to_from(path)
|
44
|
+
links.each do |link|
|
45
|
+
assert_includes stored_links, link
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_storing_paths_visited
|
50
|
+
path = '/pricing/'
|
51
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
52
|
+
index.store_path_visited(path)
|
53
|
+
assert_equal [path], index.get_paths_visited
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_queuing_paths
|
57
|
+
paths = ['/pricing/', '/features/']
|
58
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
59
|
+
index.store_paths_to_visit(paths)
|
60
|
+
assert_equal paths.reverse, index.get_paths_to_visit
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_get_domain_data
|
64
|
+
path = '/pricing/'
|
65
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
66
|
+
index.store_path path
|
67
|
+
|
68
|
+
assets = ['asset1', 'asset2']
|
69
|
+
index.store_path_assets(path, assets)
|
70
|
+
|
71
|
+
links_to = ['link1', 'link2']
|
72
|
+
index.store_path_links_to(path, links_to)
|
73
|
+
|
74
|
+
linked_to_from = ['link3', 'link4']
|
75
|
+
index.store_path_linked_to_from(path, linked_to_from)
|
76
|
+
|
77
|
+
data = index.get_domain_data
|
78
|
+
assert_equal 'www.digitalocean.com', data['domain']
|
79
|
+
|
80
|
+
assets.each do |value|
|
81
|
+
assert_includes data['paths']['/pricing/']['asset_dependencies'], value
|
82
|
+
end
|
83
|
+
|
84
|
+
links_to.each do |value|
|
85
|
+
assert_includes data['paths']['/pricing/']['links_to'], value
|
86
|
+
end
|
87
|
+
|
88
|
+
linked_to_from.each do |value|
|
89
|
+
assert_includes data['paths']['/pricing/']['linked_to_from'], value
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_get_path_data
|
94
|
+
path = '/pricing/'
|
95
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
96
|
+
index.store_path path
|
97
|
+
|
98
|
+
assets = ['asset1', 'asset2']
|
99
|
+
index.store_path_assets(path, assets)
|
100
|
+
|
101
|
+
links_to = ['link1', 'link2']
|
102
|
+
index.store_path_links_to(path, links_to)
|
103
|
+
|
104
|
+
linked_to_from = ['link3', 'link4']
|
105
|
+
index.store_path_linked_to_from(path, linked_to_from)
|
106
|
+
|
107
|
+
data = index.get_path_data(path)
|
108
|
+
|
109
|
+
assets.each do |value|
|
110
|
+
assert_includes data['asset_dependencies'], value
|
111
|
+
end
|
112
|
+
|
113
|
+
links_to.each do |value|
|
114
|
+
assert_includes data['links_to'], value
|
115
|
+
end
|
116
|
+
|
117
|
+
linked_to_from.each do |value|
|
118
|
+
assert_includes data['linked_to_from'], value
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_clear_stored_data
|
123
|
+
path = '/pricing/'
|
124
|
+
index = Crawler::Index.new(Addressable::URI.parse('https://www.digitalocean.com'))
|
125
|
+
index.store_path path
|
126
|
+
|
127
|
+
assets = ['asset1', 'asset2']
|
128
|
+
index.store_path_assets(path, assets)
|
129
|
+
|
130
|
+
links_to = ['link1', 'link2']
|
131
|
+
index.store_path_links_to(path, links_to)
|
132
|
+
|
133
|
+
linked_to_from = ['link3', 'link4']
|
134
|
+
index.store_path_linked_to_from(path, linked_to_from)
|
135
|
+
|
136
|
+
index.clear_stored_results
|
137
|
+
|
138
|
+
empty_results = {
|
139
|
+
'asset_dependencies' => [],
|
140
|
+
'links_to' => [],
|
141
|
+
'linked_to_from' => []
|
142
|
+
}
|
143
|
+
|
144
|
+
assert_equal empty_results, index.get_path_data(path)
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class ValidationsTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_protocol_validation
|
6
|
+
error = assert_raises(Crawler::InvalidProtocolError) { Crawler.new('digitalocean.com') }
|
7
|
+
assert_equal 'Please specify either http or https', error.message
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
@@ -0,0 +1,256 @@
|
|
1
|
+
module DomainHtml
|
2
|
+
|
3
|
+
# Domain links: '/', 'company/careers', '/pricing'
|
4
|
+
# Subdomains: 'cloud.digitalocean.com/...'
|
5
|
+
# External links: 'techcrunch/...'
|
6
|
+
# Assets: /assets/images/techcrunch.png, /assets/images/cover_create.jpg, //use.typekit.net/wix0mlm.js, /assets/css/style.css
|
7
|
+
def root_html
|
8
|
+
'<!doctype html>
|
9
|
+
<html data-placeholder-focus="false">
|
10
|
+
<head>
|
11
|
+
<meta charset="utf-8">
|
12
|
+
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
|
13
|
+
<title>SSD Cloud Server, VPS Server, Simple Cloud Hosting | DigitalOcean</title>
|
14
|
+
<meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
|
15
|
+
<meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
|
16
|
+
<meta name="viewport" content="width=1200, maximum-scale=1" />
|
17
|
+
<link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
|
18
|
+
<script type="text/javascript" src="//use.typekit.net/wix0mlm.js"></script>
|
19
|
+
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
20
|
+
</head>
|
21
|
+
<body class="index">
|
22
|
+
<header>
|
23
|
+
<div class="wrapper-full">
|
24
|
+
<a class="logo" href="/">DigitalOcean</a>
|
25
|
+
<a id="hiring" href="/company/careers/">We\'re Hiring!</a>
|
26
|
+
<nav id="button-nav">
|
27
|
+
<ul>
|
28
|
+
<li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
|
29
|
+
<li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
|
30
|
+
</ul>
|
31
|
+
</nav>
|
32
|
+
<nav>
|
33
|
+
<ul>
|
34
|
+
<li><a class="" href="/pricing/">Pricing</a></li>
|
35
|
+
</ul>
|
36
|
+
</nav>
|
37
|
+
</div>
|
38
|
+
</header>
|
39
|
+
<section class="main">
|
40
|
+
<section id="homepage-features" class="homepage-section">
|
41
|
+
<div class="wrapper-full">
|
42
|
+
<h2>A control panel experience made simple.</h2>
|
43
|
+
<div id="browser">
|
44
|
+
<div id="last-vid">
|
45
|
+
<a href="#" class="restart standard-button">Replay</a>
|
46
|
+
</div>
|
47
|
+
<p class="url">https://cloud.digitalocean.com/droplets</p>
|
48
|
+
<video height="100%" class="feature-video" preload="none" poster="/assets/images/cover_create.jpg">
|
49
|
+
<source src="/assets/video/create.mp4" type="video/mp4">
|
50
|
+
<source src="/assets/video/create.webm" type="video/webm">
|
51
|
+
Your browser does not support the video tag.
|
52
|
+
</video>
|
53
|
+
</div>
|
54
|
+
<ul id="feature-controls">
|
55
|
+
<li class="create active" data-video-url="/assets/video/create.webm" data-url-bar="https://cloud.digitalocean.com/droplets">
|
56
|
+
<a href="#">
|
57
|
+
<div class="icon">
|
58
|
+
<span></span>
|
59
|
+
</div>
|
60
|
+
<h3>Create Droplets</h3>
|
61
|
+
<p>Setup and deploy your Droplet\'s configuration in a matter of seconds. </p>
|
62
|
+
</a>
|
63
|
+
</li>
|
64
|
+
</ul>
|
65
|
+
</div>
|
66
|
+
</section>
|
67
|
+
<section id="companies-using">
|
68
|
+
<div class="wrapper-full">
|
69
|
+
<h3>As Seen On</h3>
|
70
|
+
<ul>
|
71
|
+
<li><a href="http://techcrunch.com/article/"><img src="/assets/images/techcrunch.png"></a></li>
|
72
|
+
</ul>
|
73
|
+
</div>
|
74
|
+
</section>
|
75
|
+
<section class="call-to-action">
|
76
|
+
<div class="wrapper-full">
|
77
|
+
<h2>Deploy an SSD cloud server in 55 seconds.</h2>
|
78
|
+
<p>Sign up for DigitalOcean today.</p>
|
79
|
+
<a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
|
80
|
+
GET STARTED NOW <span></span>
|
81
|
+
</a>
|
82
|
+
</div>
|
83
|
+
</section>
|
84
|
+
</section>
|
85
|
+
</body>
|
86
|
+
</html>'
|
87
|
+
end
|
88
|
+
|
89
|
+
# Domain links: '/', 'company/careers', '/features'
|
90
|
+
# Subdomains: 'cloud.digitalocean.com/...'
|
91
|
+
# External links: 'mashable/...'
|
92
|
+
# Assets: /assets/css/style.css, /assets/images/mashable.png
|
93
|
+
def pricing_html
|
94
|
+
'<!doctype html>
|
95
|
+
<html data-placeholder-focus="false">
|
96
|
+
<head>
|
97
|
+
<meta charset="utf-8">
|
98
|
+
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
|
99
|
+
<title>Pricing</title>
|
100
|
+
<meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
|
101
|
+
<meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
|
102
|
+
<meta name="viewport" content="width=1200, maximum-scale=1" />
|
103
|
+
<link href="/assets/css/style.css" media="screen" rel="stylesheet" type="text/css" />
|
104
|
+
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
105
|
+
</head>
|
106
|
+
<body class="index">
|
107
|
+
<header>
|
108
|
+
<div class="wrapper-full">
|
109
|
+
<a class="logo" href="/">DigitalOcean</a>
|
110
|
+
<a id="hiring" href="/company/careers/">We\'re Hiring!</a>
|
111
|
+
<nav id="button-nav">
|
112
|
+
<ul>
|
113
|
+
<li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
|
114
|
+
<li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
|
115
|
+
</ul>
|
116
|
+
</nav>
|
117
|
+
<nav>
|
118
|
+
<ul>
|
119
|
+
<li><a class="" href="/features/">Features</a></li>
|
120
|
+
<li><a id="hiring" href="/company/careers/">We\'re Hiring!</a></li>
|
121
|
+
</ul>
|
122
|
+
</nav>
|
123
|
+
</div>
|
124
|
+
</header>
|
125
|
+
<section class="main">
|
126
|
+
<section id="homepage-features" class="homepage-section">
|
127
|
+
<div class="wrapper-full">
|
128
|
+
<h2>A control panel experience made simple.</h2>
|
129
|
+
<div id="browser">
|
130
|
+
<div id="last-vid">
|
131
|
+
<a href="#" class="restart standard-button">Replay</a>
|
132
|
+
</div>
|
133
|
+
<p class="url">https://cloud.digitalocean.com/droplets</p>
|
134
|
+
</div>
|
135
|
+
<ul id="feature-controls">
|
136
|
+
<li class="dns" data-video-url="/assets/video/dns.webm" data-url-bar="https://cloud.digitalocean.com/domains">
|
137
|
+
<a href="#">
|
138
|
+
<div class="icon">
|
139
|
+
<span></span>
|
140
|
+
</div>
|
141
|
+
<h3>DNS Management</h3>
|
142
|
+
<p>Full feature DNS management allows<br>you to easily manage your domains.</p>
|
143
|
+
</a>
|
144
|
+
</li>
|
145
|
+
</ul>
|
146
|
+
</div>
|
147
|
+
</section>
|
148
|
+
<section id="companies-using">
|
149
|
+
<div class="wrapper-full">
|
150
|
+
<h3>As Seen On</h3>
|
151
|
+
<ul>
|
152
|
+
<li><a href="http://mashable.com/article/"><img src="/assets/images/mashable.png"></a></li>
|
153
|
+
</ul>
|
154
|
+
</div>
|
155
|
+
</section>
|
156
|
+
<section class="call-to-action">
|
157
|
+
<div class="wrapper-full">
|
158
|
+
<h2>Deploy an SSD cloud server in 55 seconds.</h2>
|
159
|
+
<p>Sign up for DigitalOcean today.</p>
|
160
|
+
<a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
|
161
|
+
GET STARTED NOW <span></span>
|
162
|
+
</a>
|
163
|
+
</div>
|
164
|
+
</section>
|
165
|
+
</section>
|
166
|
+
</body>
|
167
|
+
</html>'
|
168
|
+
end
|
169
|
+
|
170
|
+
# Domain links: '/', 'company/careers', '/pricing'
|
171
|
+
# Subdomains: 'cloud.digitalocean.com/...'
|
172
|
+
# External links: 'venturebeat/...'
|
173
|
+
# Assets: /assets/images/venturebeat.png
|
174
|
+
def features_html
|
175
|
+
'<!doctype html>
|
176
|
+
<html data-placeholder-focus="false">
|
177
|
+
<head>
|
178
|
+
<meta charset="utf-8">
|
179
|
+
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">
|
180
|
+
<title>Features</title>
|
181
|
+
<meta name="description" content="Deploy an 512MB RAM and 20GB SSD cloud server in 55 seconds for $5/month. Simple, fast, scalable SSD cloud virtual servers.">
|
182
|
+
<meta name="keywords" content="cloud server, cloud hosting, vps, vps server, vps server hosting, vps hosting, virtual server, virtual private server, ubuntu server, centos server, debian server, linux server, fedora server, linux mint server">
|
183
|
+
<meta name="viewport" content="width=1200, maximum-scale=1" />
|
184
|
+
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
185
|
+
</head>
|
186
|
+
<body class="index">
|
187
|
+
<header>
|
188
|
+
<div class="wrapper-full">
|
189
|
+
<a class="logo" href="/">DigitalOcean</a>
|
190
|
+
<a id="hiring" href="/company/careers/">We\'re Hiring!</a>
|
191
|
+
<nav id="button-nav">
|
192
|
+
<ul>
|
193
|
+
<li><a href="https://cloud.digitalocean.com/login">Log In</a></li>
|
194
|
+
<li class="button-outline"><a href="https://cloud.digitalocean.com/registrations/new">Sign Up</a></li>
|
195
|
+
</ul>
|
196
|
+
</nav>
|
197
|
+
<nav>
|
198
|
+
<ul>
|
199
|
+
<li><a class="" href="/pricing/">Pricing</a></li>
|
200
|
+
</ul>
|
201
|
+
</nav>
|
202
|
+
</div>
|
203
|
+
</header>
|
204
|
+
<section class="main">
|
205
|
+
<section id="homepage-features" class="homepage-section">
|
206
|
+
<div class="wrapper-full">
|
207
|
+
<h2>A control panel experience made simple.</h2>
|
208
|
+
<div id="browser">
|
209
|
+
<div id="last-vid">
|
210
|
+
<a href="#" class="restart standard-button">Replay</a>
|
211
|
+
</div>
|
212
|
+
</div>
|
213
|
+
<ul id="feature-controls">
|
214
|
+
<li class="backups" data-video-url="/assets/video/snapshot.webm" data-url-bar="https://cloud.digitalocean.com/images">
|
215
|
+
<a href="#">
|
216
|
+
<div class="icon">
|
217
|
+
<span></span>
|
218
|
+
</div>
|
219
|
+
<h3>Snapshots</h3>
|
220
|
+
<p>Build a new server from a snapshot<br>you took, saving you configuration time.</p>
|
221
|
+
</a>
|
222
|
+
</li>
|
223
|
+
</ul>
|
224
|
+
</div>
|
225
|
+
</section>
|
226
|
+
<section id="companies-using">
|
227
|
+
<div class="wrapper-full">
|
228
|
+
<h3>As Seen On</h3>
|
229
|
+
<ul>
|
230
|
+
<li><a href="http://venturebeat.com/article/"><img src="/assets/images/venturebeat.png"></a></li>
|
231
|
+
</ul>
|
232
|
+
</div>
|
233
|
+
</section>
|
234
|
+
<section class="call-to-action">
|
235
|
+
<div class="wrapper-full">
|
236
|
+
<h2>Deploy an SSD cloud server in 55 seconds.</h2>
|
237
|
+
<p>Sign up for DigitalOcean today.</p>
|
238
|
+
<a class="standard-button" href="https://cloud.digitalocean.com/registrations/new">
|
239
|
+
GET STARTED NOW <span></span>
|
240
|
+
</a>
|
241
|
+
</div>
|
242
|
+
</section>
|
243
|
+
</section>
|
244
|
+
</body>
|
245
|
+
</html>'
|
246
|
+
end
|
247
|
+
|
248
|
+
def careers_html
|
249
|
+
'<!doctype html>
|
250
|
+
<html data-placeholder-focus="false">
|
251
|
+
<body>
|
252
|
+
No substantial content
|
253
|
+
</body>
|
254
|
+
</html>'
|
255
|
+
end
|
256
|
+
end
|