embed_html 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +2 -2
- data/bin/eurl +6 -2
- data/embed_html.gemspec +5 -2
- data/lib/embed_html/embeder.rb +46 -1
- metadata +15 -3
data/Rakefile
CHANGED
@@ -3,13 +3,13 @@ require 'rubygems'
|
|
3
3
|
require 'rake'
|
4
4
|
require 'echoe'
|
5
5
|
|
6
|
-
Echoe.new('embed_html', '0.
|
6
|
+
Echoe.new('embed_html', '0.3.0') do |p|
|
7
7
|
p.description = "Download and embed images in html using base64 data encoding"
|
8
8
|
p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
|
9
9
|
p.url = "http://github.com/siuying/embed_html"
|
10
10
|
p.author = "Francis Chong"
|
11
11
|
p.email = "francis@ignition.hk"
|
12
12
|
p.ignore_pattern = ["tmp/*", "script/*", "*.html"]
|
13
|
-
p.runtime_dependencies = ["hpricot"]
|
13
|
+
p.runtime_dependencies = ["hpricot", "mime-types"]
|
14
14
|
end
|
15
15
|
|
data/bin/eurl
CHANGED
@@ -8,10 +8,14 @@ if url && file
|
|
8
8
|
log = Logger.new($stdout)
|
9
9
|
log.level = Logger::INFO
|
10
10
|
|
11
|
-
|
11
|
+
if url =~ /^http/
|
12
|
+
html = EmbedHtml::Embeder.new(url, log, concurrency).process
|
13
|
+
else
|
14
|
+
html = EmbedHtml::Embeder.new(url, log, concurrency).process_local
|
15
|
+
end
|
12
16
|
File.open(file, 'w') {|f| f.write(html)}
|
13
17
|
|
14
18
|
else
|
15
|
-
puts "usage: eurl <URL> <OUTPUT_FILE>"
|
19
|
+
puts "usage: eurl <URL-OR-LOCAL_FILE> <OUTPUT_FILE>"
|
16
20
|
|
17
21
|
end
|
data/embed_html.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{embed_html}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.3.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Francis Chong"]
|
9
|
-
s.date = %q{2010-06-
|
9
|
+
s.date = %q{2010-06-25}
|
10
10
|
s.default_executable = %q{eurl}
|
11
11
|
s.description = %q{Download and embed images in html using base64 data encoding}
|
12
12
|
s.email = %q{francis@ignition.hk}
|
@@ -26,10 +26,13 @@ Gem::Specification.new do |s|
|
|
26
26
|
|
27
27
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
28
28
|
s.add_runtime_dependency(%q<hpricot>, [">= 0"])
|
29
|
+
s.add_runtime_dependency(%q<mime-types>, [">= 0"])
|
29
30
|
else
|
30
31
|
s.add_dependency(%q<hpricot>, [">= 0"])
|
32
|
+
s.add_dependency(%q<mime-types>, [">= 0"])
|
31
33
|
end
|
32
34
|
else
|
33
35
|
s.add_dependency(%q<hpricot>, [">= 0"])
|
36
|
+
s.add_dependency(%q<mime-types>, [">= 0"])
|
34
37
|
end
|
35
38
|
end
|
data/lib/embed_html/embeder.rb
CHANGED
@@ -4,6 +4,7 @@ require 'hpricot'
|
|
4
4
|
require 'uri'
|
5
5
|
require 'base64'
|
6
6
|
require 'typhoeus'
|
7
|
+
require 'mime/types'
|
7
8
|
|
8
9
|
module EmbedHtml
|
9
10
|
class Embeder
|
@@ -57,6 +58,39 @@ module EmbedHtml
|
|
57
58
|
doc.to_html
|
58
59
|
end
|
59
60
|
|
61
|
+
def process_local
|
62
|
+
@logger.info "downloading url: #{@url}"
|
63
|
+
html = open(@url).read
|
64
|
+
doc = Hpricot(html)
|
65
|
+
|
66
|
+
doc.search("//img").each do |img|
|
67
|
+
begin
|
68
|
+
fetch_file(img, 'src')
|
69
|
+
rescue StandardError => e
|
70
|
+
@logger.error "failed download image: #{img['src']} #{e.inspect}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
doc.search("//script").each do |script|
|
75
|
+
begin
|
76
|
+
fetch_file(script, 'src')
|
77
|
+
rescue StandardError => e
|
78
|
+
@logger.error "failed download script: #{script['src']} #{e.inspect}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
doc.search("//link").each do |link|
|
83
|
+
begin
|
84
|
+
fetch_file(link, 'href')
|
85
|
+
rescue StandardError => e
|
86
|
+
@logger.error "failed download linked resource: #{link['href']} #{e.inspect}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
@logger.info "done"
|
91
|
+
doc.to_html
|
92
|
+
end
|
93
|
+
|
60
94
|
private
|
61
95
|
def create_fetch_file_request(element, field)
|
62
96
|
file_url = URI.join(@url, element.attributes[field])
|
@@ -73,6 +107,17 @@ module EmbedHtml
|
|
73
107
|
end
|
74
108
|
return request
|
75
109
|
end
|
76
|
-
|
110
|
+
|
111
|
+
def fetch_file(element, field)
|
112
|
+
file_url = element.attributes[field]
|
113
|
+
@logger.debug "queue download file: #{file_url}"
|
114
|
+
|
115
|
+
type = MIME::Types.type_for(file_url).first.to_s rescue "application/data"
|
116
|
+
data = open(file_url.to_s).read
|
117
|
+
if data && type
|
118
|
+
data_b64 = Base64.encode64(data)
|
119
|
+
element.attributes[field] = "data:#{type};base64,#{data_b64}"
|
120
|
+
end
|
121
|
+
end
|
77
122
|
end
|
78
123
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 2
|
8
7
|
- 3
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Francis Chong
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-06-
|
17
|
+
date: 2010-06-25 00:00:00 +08:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -29,6 +29,18 @@ dependencies:
|
|
29
29
|
version: "0"
|
30
30
|
type: :runtime
|
31
31
|
version_requirements: *id001
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: mime-types
|
34
|
+
prerelease: false
|
35
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
segments:
|
40
|
+
- 0
|
41
|
+
version: "0"
|
42
|
+
type: :runtime
|
43
|
+
version_requirements: *id002
|
32
44
|
description: Download and embed images in html using base64 data encoding
|
33
45
|
email: francis@ignition.hk
|
34
46
|
executables:
|