staticizer 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -7
- data/lib/staticizer/command.rb +1 -1
- data/lib/staticizer/crawler.rb +17 -2
- data/lib/staticizer/version.rb +1 -1
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8809f12f85e54642f56afc7fc537c5d20ab64db1
|
4
|
+
data.tar.gz: 12746fd0c57dc04ee204b67d635e0033b21cc96c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 81f7e9035328e62306c3e31abe6a8969e8b876c944af2589cecf1a3e43879469e26374a1fd143fa15b9e370fe6b8f2fc9916459a2c64f6fe4a34fa186f8aceb5
|
7
|
+
data.tar.gz: 44e25794a99861646943913ae3b530657febafa835652b86975dcfc39f2071a643c137df85338dd2a53907f0494daa44f10d028429a4a3b9516cf739db3eb158
|
data/README.md
CHANGED
@@ -9,13 +9,14 @@ website. If the website goes down this backup would be available
|
|
9
9
|
with reduced functionality.
|
10
10
|
|
11
11
|
S3 and Route 53 provide an great way to host a static emergency backup for a website.
|
12
|
-
See this article - http://aws.typepad.com/aws/2013/02/create-a-backup-website-using-route-53-dns-failover-and-s3-website-hosting.html
|
13
|
-
. In our experience it works
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
the
|
12
|
+
See this article - http://aws.typepad.com/aws/2013/02/create-a-backup-website-using-route-53-dns-failover-and-s3-website-hosting.html
|
13
|
+
. In our experience it works well and is incredibly cheap. Our average sized website
|
14
|
+
with a few hundred pages and assets is less than US$1 a month.
|
15
|
+
|
16
|
+
We tried using existing tools httrack/wget to crawl and create a static version
|
17
|
+
of the site to upload to S3, but we found that they did not work well with S3 hosting.
|
18
|
+
We wanted the site uploaded to S3 to respond to the *exact* same URLs (where possible) as
|
19
|
+
the existing site. This way when the site goes down incoming links from Google search
|
19
20
|
results etc. will still work.
|
20
21
|
|
21
22
|
## TODO
|
@@ -87,6 +88,21 @@ This will only crawl urls in the domain squaremill.com
|
|
87
88
|
s = Staticizer::Crawler.new("http://squaremill.com", :output_dir => "/tmp/crawl")
|
88
89
|
s.crawl
|
89
90
|
|
91
|
+
|
92
|
+
### Crawl a website and make all pages contain 'noindex' meta tag
|
93
|
+
|
94
|
+
s = Staticizer::Crawler.new("http://squaremill.com",
|
95
|
+
:output_dir => "/tmp/crawl",
|
96
|
+
:process_body => lambda {|body, uri, opts|
|
97
|
+
# not the best regex, but it will do for our use
|
98
|
+
body = body.gsub(/<meta\s+name=['"]robots[^>]+>/i,'')
|
99
|
+
body = body.gsub(/<head>/i,"<head>\n<meta name='robots' content='noindex'>")
|
100
|
+
body
|
101
|
+
}
|
102
|
+
)
|
103
|
+
s.crawl
|
104
|
+
|
105
|
+
|
90
106
|
### Crawl a website and rewrite all non www urls to www
|
91
107
|
|
92
108
|
s = Staticizer::Crawler.new("http://squaremill.com",
|
data/lib/staticizer/command.rb
CHANGED
data/lib/staticizer/crawler.rb
CHANGED
@@ -32,6 +32,11 @@ module Staticizer
|
|
32
32
|
uri = URI.parse(initial_page)
|
33
33
|
@opts[:valid_domains] ||= [uri.host]
|
34
34
|
end
|
35
|
+
|
36
|
+
if @opts[:process_body]
|
37
|
+
@process_body = @opts[:process_body]
|
38
|
+
end
|
39
|
+
|
35
40
|
add_url(initial_page)
|
36
41
|
end
|
37
42
|
|
@@ -132,6 +137,7 @@ module Staticizer
|
|
132
137
|
end
|
133
138
|
|
134
139
|
body = response.respond_to?(:read_body) ? response.read_body : response
|
140
|
+
body = process_body(body, uri, {})
|
135
141
|
outfile = File.join(current, "/#{filename}")
|
136
142
|
if filename == ""
|
137
143
|
indexfile = File.join(outfile, "/index.html")
|
@@ -158,9 +164,11 @@ module Staticizer
|
|
158
164
|
opts[:content_type] = response['content-type'] rescue "text/html"
|
159
165
|
@log.info "Uploading #{key} to s3 with content type #{opts[:content_type]}"
|
160
166
|
if response.respond_to?(:read_body)
|
161
|
-
|
167
|
+
body = process_body(response.read_body, uri, opts)
|
168
|
+
@s3_bucket.objects[key].write(body, opts)
|
162
169
|
else
|
163
|
-
|
170
|
+
body = process_body(response, uri, opts)
|
171
|
+
@s3_bucket.objects[key].write(body, opts)
|
164
172
|
end
|
165
173
|
end
|
166
174
|
|
@@ -189,6 +197,13 @@ module Staticizer
|
|
189
197
|
save_page(body, url)
|
190
198
|
end
|
191
199
|
|
200
|
+
def process_body(body, uri, opts)
|
201
|
+
if @process_body
|
202
|
+
body = @process_body.call(body, uri, opts)
|
203
|
+
end
|
204
|
+
body
|
205
|
+
end
|
206
|
+
|
192
207
|
# Fetch a URI and save it to disk
|
193
208
|
def process_url(url, info)
|
194
209
|
@http_connections ||= {}
|
data/lib/staticizer/version.rb
CHANGED
metadata
CHANGED
@@ -1,83 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: staticizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Conor Hunt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: webmock
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: nokogiri
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: aws-sdk
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
description: A tool to create a static version of a website for hosting on S3. Can
|
@@ -89,7 +89,7 @@ executables:
|
|
89
89
|
extensions: []
|
90
90
|
extra_rdoc_files: []
|
91
91
|
files:
|
92
|
-
- .gitignore
|
92
|
+
- ".gitignore"
|
93
93
|
- Gemfile
|
94
94
|
- LICENSE.txt
|
95
95
|
- README.md
|
@@ -112,17 +112,17 @@ require_paths:
|
|
112
112
|
- lib
|
113
113
|
required_ruby_version: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
119
|
requirements:
|
120
|
-
- -
|
120
|
+
- - ">="
|
121
121
|
- !ruby/object:Gem::Version
|
122
122
|
version: '0'
|
123
123
|
requirements: []
|
124
124
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.4.5
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: A tool to create a static version of a website for hosting on S3.
|