staticfy 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/Rakefile +44 -0
- data/bin/staticfy +27 -0
- data/lib/staticfy.rb +33 -0
- data/lib/staticfy/anemone_hacks.rb +48 -0
- data/lib/staticfy/crawler.rb +44 -0
- data/lib/staticfy/handlers.rb +63 -0
- data/lib/staticfy/handlers/base.rb +50 -0
- data/lib/staticfy/handlers/css.rb +58 -0
- data/lib/staticfy/handlers/html.rb +89 -0
- data/lib/staticfy/handlers/raw.rb +26 -0
- data/lib/staticfy/version.rb +23 -0
- data/staticfy.gemspec +25 -0
- data/test/staticfy/handlers_test.rb +65 -0
- data/test/test_helper.rb +23 -0
- metadata +92 -0
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require 'bundler'
|
22
|
+
Bundler::GemHelper.install_tasks
|
23
|
+
|
24
|
+
$: << File.expand_path("../lib", __FILE__)
|
25
|
+
|
26
|
+
task :default => [:test]
|
27
|
+
|
28
|
+
desc "Run tests"
|
29
|
+
task :test do
|
30
|
+
$: << File.expand_path("../test", __FILE__)
|
31
|
+
|
32
|
+
Dir["./test/**/*_test.rb"].each do |test|
|
33
|
+
require test
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
desc "Test run"
|
38
|
+
task :runtest do
|
39
|
+
require 'rubygems'
|
40
|
+
require 'anemone'
|
41
|
+
require 'staticfy'
|
42
|
+
|
43
|
+
Staticfy.crawl("http://www.disqueagua.com/", :output => File.expand_path("../output", __FILE__))
|
44
|
+
end
|
data/bin/staticfy
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright (c) 2011 Wilker Lúcio
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the "Software"), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in
|
12
|
+
# all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
20
|
+
# THE SOFTWARE.
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require 'staticfy'
|
24
|
+
|
25
|
+
url = ARGV[0]
|
26
|
+
|
27
|
+
Staticfy.crawl(url, :output => "output")
|
data/lib/staticfy.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require 'anemone'
|
22
|
+
require 'staticfy/anemone_hacks'
|
23
|
+
|
24
|
+
module Staticfy
|
25
|
+
autoload :Crawler, "staticfy/crawler"
|
26
|
+
autoload :Handlers, "staticfy/handlers"
|
27
|
+
|
28
|
+
def self.crawl(url, options = {}, &block)
|
29
|
+
crawler = Crawler.new(url, options)
|
30
|
+
block.call(crawler) if block
|
31
|
+
crawler.crawl
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
class Anemone::Page
|
22
|
+
alias_method :old_initialize, :initialize
|
23
|
+
|
24
|
+
def initialize(url, params = {})
|
25
|
+
old_initialize(url, params)
|
26
|
+
|
27
|
+
@parser = factory_parser
|
28
|
+
end
|
29
|
+
|
30
|
+
def links
|
31
|
+
@links ||= @parser.fetch_links
|
32
|
+
end
|
33
|
+
|
34
|
+
def local_body
|
35
|
+
@parser.local_body
|
36
|
+
end
|
37
|
+
|
38
|
+
def local_uri
|
39
|
+
Staticfy::Handlers.local_uri(url)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def factory_parser
|
45
|
+
extension = File.extname(url.path)
|
46
|
+
Staticfy::Handlers.factory(extension).new(self)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Staticfy
|
22
|
+
class Crawler
|
23
|
+
attr_accessor :url
|
24
|
+
|
25
|
+
def initialize(url, options = {})
|
26
|
+
@url = url
|
27
|
+
@options = options
|
28
|
+
end
|
29
|
+
|
30
|
+
def crawl
|
31
|
+
Anemone.crawl(url) do |anemone|
|
32
|
+
anemone.on_every_page do |page|
|
33
|
+
local_path = File.join(@options[:output], page.local_uri)
|
34
|
+
|
35
|
+
File.open(local_path, "wb") do |file|
|
36
|
+
file << page.local_body
|
37
|
+
end
|
38
|
+
|
39
|
+
puts "Saved #{local_path}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Staticfy
|
22
|
+
module Handlers
|
23
|
+
autoload :Base, "staticfy/handlers/base"
|
24
|
+
autoload :HTML, "staticfy/handlers/html"
|
25
|
+
autoload :CSS, "staticfy/handlers/css"
|
26
|
+
autoload :Raw, "staticfy/handlers/raw"
|
27
|
+
|
28
|
+
class << self
|
29
|
+
KNOW_EXTENSIONS = [".htm", ".js", ".css", ".gif", ".jpg", ".jpeg", ".png", ".swf"]
|
30
|
+
|
31
|
+
def factory(ext)
|
32
|
+
ext_table = {
|
33
|
+
".css" => CSS
|
34
|
+
}
|
35
|
+
|
36
|
+
unless klass = ext_table[ext.to_s.downcase]
|
37
|
+
klass = HTML
|
38
|
+
end
|
39
|
+
|
40
|
+
klass
|
41
|
+
end
|
42
|
+
|
43
|
+
def local_uri(uri)
|
44
|
+
invalid_chars = /[*?\\\/=&]/
|
45
|
+
|
46
|
+
current_ext = File.extname(uri.path)
|
47
|
+
ext = KNOW_EXTENSIONS.include?(current_ext) ? current_ext : ".html"
|
48
|
+
|
49
|
+
local = uri.path[1..-1]
|
50
|
+
local = "index" unless local.length > 0
|
51
|
+
|
52
|
+
if uri.query
|
53
|
+
local += "?" + uri.query.to_s
|
54
|
+
local += ext
|
55
|
+
else
|
56
|
+
local += ext unless current_ext == ext
|
57
|
+
end
|
58
|
+
|
59
|
+
local.gsub(invalid_chars, "_")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Staticfy
|
22
|
+
module Handlers
|
23
|
+
class Base
|
24
|
+
def initialize(page)
|
25
|
+
@page = page
|
26
|
+
end
|
27
|
+
|
28
|
+
def local_uri
|
29
|
+
Handlers.local_uri(url)
|
30
|
+
end
|
31
|
+
|
32
|
+
def fetch_links
|
33
|
+
[]
|
34
|
+
end
|
35
|
+
|
36
|
+
def local_body
|
37
|
+
body
|
38
|
+
end
|
39
|
+
|
40
|
+
# redirect calls to page, if it responds to
|
41
|
+
def method_missing(name, *args)
|
42
|
+
if @page.respond_to?(name)
|
43
|
+
@page.send(name, *args)
|
44
|
+
else
|
45
|
+
super
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Staticfy
|
22
|
+
module Handlers
|
23
|
+
class CSS < Base
|
24
|
+
URL_PATTERN = /url\s*\(['"]?(.+?)['"]?\)/
|
25
|
+
|
26
|
+
def fetch_links
|
27
|
+
links = []
|
28
|
+
|
29
|
+
body.scan(URL_PATTERN) do |url|
|
30
|
+
url = url[0]
|
31
|
+
next if url.empty?
|
32
|
+
abs = to_absolute(URI(url))
|
33
|
+
|
34
|
+
if in_domain?(abs)
|
35
|
+
links << abs
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
links
|
40
|
+
end
|
41
|
+
|
42
|
+
def local_body
|
43
|
+
body.gsub(URL_PATTERN) do |url|
|
44
|
+
parts = url.match(URL_PATTERN)
|
45
|
+
next if url.empty?
|
46
|
+
abs = to_absolute(URI(parts[1]))
|
47
|
+
|
48
|
+
if in_domain?(abs)
|
49
|
+
local = Staticfy::Handlers.local_uri(abs).to_s
|
50
|
+
"url('#{local}')"
|
51
|
+
else
|
52
|
+
url
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Staticfy
|
22
|
+
module Handlers
|
23
|
+
class HTML < Base
|
24
|
+
def fetch_links
|
25
|
+
return [] unless doc
|
26
|
+
|
27
|
+
links = []
|
28
|
+
|
29
|
+
# follow links
|
30
|
+
attribute_iterator(doc, "a", "href") do |abs, tag|
|
31
|
+
links << abs
|
32
|
+
end
|
33
|
+
|
34
|
+
# fetch scripts
|
35
|
+
attribute_iterator(doc, "script", "src") do |abs, tag|
|
36
|
+
links << abs
|
37
|
+
end
|
38
|
+
|
39
|
+
# follow styles
|
40
|
+
attribute_iterator(doc, "link", "href") do |abs, tag|
|
41
|
+
links << abs
|
42
|
+
end
|
43
|
+
|
44
|
+
# images
|
45
|
+
attribute_iterator(doc, "img", "src") do |abs, tag|
|
46
|
+
links << abs
|
47
|
+
end
|
48
|
+
|
49
|
+
attribute_iterator(doc, "input", "src") do |abs, tag|
|
50
|
+
links << abs
|
51
|
+
end
|
52
|
+
|
53
|
+
links.uniq!
|
54
|
+
links
|
55
|
+
end
|
56
|
+
|
57
|
+
def local_body
|
58
|
+
return body unless doc
|
59
|
+
|
60
|
+
html = doc.dup
|
61
|
+
|
62
|
+
update_links(html, "a", "href")
|
63
|
+
update_links(html, "script", "src")
|
64
|
+
update_links(html, "link", "href")
|
65
|
+
update_links(html, "img", "src")
|
66
|
+
update_links(html, "input", "src")
|
67
|
+
|
68
|
+
html.to_s
|
69
|
+
end
|
70
|
+
|
71
|
+
def attribute_iterator(document, tag, attr)
|
72
|
+
document.search("//#{tag}[@#{attr}]").each do |tag|
|
73
|
+
a = tag[attr]
|
74
|
+
next if a.nil? or a.empty?
|
75
|
+
abs = to_absolute(URI(a)) rescue next
|
76
|
+
if in_domain?(abs)
|
77
|
+
yield abs, tag
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def update_links(document, tag, attr)
|
83
|
+
attribute_iterator(document, tag, attr) do |abs, tag|
|
84
|
+
tag[attr] = Staticfy::Handlers.local_uri(abs)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Staticfy
|
22
|
+
module Handlers
|
23
|
+
class Raw < Base
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
module Staticfy
|
22
|
+
VERSION = "0.0.1"
|
23
|
+
end
|
data/staticfy.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "staticfy/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "staticfy"
|
7
|
+
s.version = Staticfy::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Wilker Lúcio"]
|
10
|
+
s.email = ["wilkerlucio@gmail.com"]
|
11
|
+
s.homepage = "https://www.github.com/wilkerlucio/staticfy"
|
12
|
+
s.summary = %q{Turn online sites into html static ones.}
|
13
|
+
s.description = %q{This gem provides a simple tool to make a full online website into a static one. This can be useful for companies that needs to take system off from a client, but want to keep it online in static manner.}
|
14
|
+
|
15
|
+
s.rubyforge_project = "staticfy"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency("anemone", "0.6.1") # currently the Staticfy do some hacks on Anemone, so, specifique version may be required
|
23
|
+
|
24
|
+
s.add_development_dependency("mocha")
|
25
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require "test_helper"
|
22
|
+
|
23
|
+
class StaticfyHandlersTest < Test::Unit::TestCase
|
24
|
+
def test_factory_handler_html
|
25
|
+
assert_equal Staticfy::Handlers::HTML, Staticfy::Handlers.factory(".html")
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_factory_handler_css
|
29
|
+
assert_equal Staticfy::Handlers::CSS, Staticfy::Handlers.factory(".css")
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_factory_handler_other
|
33
|
+
assert_equal Staticfy::Handlers::Raw, Staticfy::Handlers.factory(".other")
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_local_uri_replace_invalid_chars
|
37
|
+
uri = URI.parse("http://example.com/some/place/here.html")
|
38
|
+
|
39
|
+
assert_equal "some_place_here.html", Staticfy::Handlers.local_uri(uri)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_local_uri_append_querystring_and_extension
|
43
|
+
uri = URI.parse("http://example.com/some/place/here?with=query&and=data")
|
44
|
+
|
45
|
+
assert_equal "some_place_here_with_query_and_data.html", Staticfy::Handlers.local_uri(uri)
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_local_uri_use_index_if_root
|
49
|
+
uri = URI.parse("http://example.com/")
|
50
|
+
|
51
|
+
assert_equal "index.html", Staticfy::Handlers.local_uri(uri)
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_local_uri_add_html_for_unknow_extensions
|
55
|
+
uri = URI.parse("http://example.com/page.php")
|
56
|
+
|
57
|
+
assert_equal "page.php.html", Staticfy::Handlers.local_uri(uri)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_local_uri_preserve_know_extension_in_with_query_data
|
61
|
+
uri = URI.parse("http://example.com/some/place/here.png?with=query&and=data")
|
62
|
+
|
63
|
+
assert_equal "some_place_here.png_with_query_and_data.png", Staticfy::Handlers.local_uri(uri)
|
64
|
+
end
|
65
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright (c) 2011 Wilker Lúcio
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require 'test/unit'
|
22
|
+
require 'mocha'
|
23
|
+
require 'staticfy'
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: staticfy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- "Wilker L\xC3\xBAcio"
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-06-27 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: anemone
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - "="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.6.1
|
24
|
+
type: :runtime
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: mocha
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id002
|
37
|
+
description: This gem provides a simple tool to make a full online website into a static one. This can be useful for companies that needs to take system off from a client, but want to keep it online in static manner.
|
38
|
+
email:
|
39
|
+
- wilkerlucio@gmail.com
|
40
|
+
executables:
|
41
|
+
- staticfy
|
42
|
+
extensions: []
|
43
|
+
|
44
|
+
extra_rdoc_files: []
|
45
|
+
|
46
|
+
files:
|
47
|
+
- .gitignore
|
48
|
+
- Gemfile
|
49
|
+
- Rakefile
|
50
|
+
- bin/staticfy
|
51
|
+
- lib/staticfy.rb
|
52
|
+
- lib/staticfy/anemone_hacks.rb
|
53
|
+
- lib/staticfy/crawler.rb
|
54
|
+
- lib/staticfy/handlers.rb
|
55
|
+
- lib/staticfy/handlers/base.rb
|
56
|
+
- lib/staticfy/handlers/css.rb
|
57
|
+
- lib/staticfy/handlers/html.rb
|
58
|
+
- lib/staticfy/handlers/raw.rb
|
59
|
+
- lib/staticfy/version.rb
|
60
|
+
- staticfy.gemspec
|
61
|
+
- test/staticfy/handlers_test.rb
|
62
|
+
- test/test_helper.rb
|
63
|
+
homepage: https://www.github.com/wilkerlucio/staticfy
|
64
|
+
licenses: []
|
65
|
+
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: "0"
|
83
|
+
requirements: []
|
84
|
+
|
85
|
+
rubyforge_project: staticfy
|
86
|
+
rubygems_version: 1.7.2
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: Turn online sites into html static ones.
|
90
|
+
test_files:
|
91
|
+
- test/staticfy/handlers_test.rb
|
92
|
+
- test/test_helper.rb
|