embed_html 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/bin/eurl +6 -2
- data/embed_html.gemspec +5 -2
- data/lib/embed_html/embeder.rb +46 -1
- metadata +15 -3
data/Rakefile
CHANGED
@@ -3,13 +3,13 @@ require 'rubygems'
|
|
3
3
|
require 'rake'
|
4
4
|
require 'echoe'
|
5
5
|
|
6
|
-
Echoe.new('embed_html', '0.
|
6
|
+
Echoe.new('embed_html', '0.3.0') do |p|
|
7
7
|
p.description = "Download and embed images in html using base64 data encoding"
|
8
8
|
p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
|
9
9
|
p.url = "http://github.com/siuying/embed_html"
|
10
10
|
p.author = "Francis Chong"
|
11
11
|
p.email = "francis@ignition.hk"
|
12
12
|
p.ignore_pattern = ["tmp/*", "script/*", "*.html"]
|
13
|
-
p.runtime_dependencies = ["hpricot"]
|
13
|
+
p.runtime_dependencies = ["hpricot", "mime-types"]
|
14
14
|
end
|
15
15
|
|
data/bin/eurl
CHANGED
@@ -8,10 +8,14 @@ if url && file
|
|
8
8
|
log = Logger.new($stdout)
|
9
9
|
log.level = Logger::INFO
|
10
10
|
|
11
|
-
|
11
|
+
if url =~ /^http/
|
12
|
+
html = EmbedHtml::Embeder.new(url, log, concurrency).process
|
13
|
+
else
|
14
|
+
html = EmbedHtml::Embeder.new(url, log, concurrency).process_local
|
15
|
+
end
|
12
16
|
File.open(file, 'w') {|f| f.write(html)}
|
13
17
|
|
14
18
|
else
|
15
|
-
puts "usage: eurl <URL> <OUTPUT_FILE>"
|
19
|
+
puts "usage: eurl <URL-OR-LOCAL_FILE> <OUTPUT_FILE>"
|
16
20
|
|
17
21
|
end
|
data/embed_html.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{embed_html}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.3.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Francis Chong"]
|
9
|
-
s.date = %q{2010-06-
|
9
|
+
s.date = %q{2010-06-25}
|
10
10
|
s.default_executable = %q{eurl}
|
11
11
|
s.description = %q{Download and embed images in html using base64 data encoding}
|
12
12
|
s.email = %q{francis@ignition.hk}
|
@@ -26,10 +26,13 @@ Gem::Specification.new do |s|
|
|
26
26
|
|
27
27
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
28
28
|
s.add_runtime_dependency(%q<hpricot>, [">= 0"])
|
29
|
+
s.add_runtime_dependency(%q<mime-types>, [">= 0"])
|
29
30
|
else
|
30
31
|
s.add_dependency(%q<hpricot>, [">= 0"])
|
32
|
+
s.add_dependency(%q<mime-types>, [">= 0"])
|
31
33
|
end
|
32
34
|
else
|
33
35
|
s.add_dependency(%q<hpricot>, [">= 0"])
|
36
|
+
s.add_dependency(%q<mime-types>, [">= 0"])
|
34
37
|
end
|
35
38
|
end
|
data/lib/embed_html/embeder.rb
CHANGED
@@ -4,6 +4,7 @@ require 'hpricot'
|
|
4
4
|
require 'uri'
|
5
5
|
require 'base64'
|
6
6
|
require 'typhoeus'
|
7
|
+
require 'mime/types'
|
7
8
|
|
8
9
|
module EmbedHtml
|
9
10
|
class Embeder
|
@@ -57,6 +58,39 @@ module EmbedHtml
|
|
57
58
|
doc.to_html
|
58
59
|
end
|
59
60
|
|
61
|
+
def process_local
|
62
|
+
@logger.info "downloading url: #{@url}"
|
63
|
+
html = open(@url).read
|
64
|
+
doc = Hpricot(html)
|
65
|
+
|
66
|
+
doc.search("//img").each do |img|
|
67
|
+
begin
|
68
|
+
fetch_file(img, 'src')
|
69
|
+
rescue StandardError => e
|
70
|
+
@logger.error "failed download image: #{img['src']} #{e.inspect}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
doc.search("//script").each do |script|
|
75
|
+
begin
|
76
|
+
fetch_file(script, 'src')
|
77
|
+
rescue StandardError => e
|
78
|
+
@logger.error "failed download script: #{script['src']} #{e.inspect}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
doc.search("//link").each do |link|
|
83
|
+
begin
|
84
|
+
fetch_file(link, 'href')
|
85
|
+
rescue StandardError => e
|
86
|
+
@logger.error "failed download linked resource: #{link['href']} #{e.inspect}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
@logger.info "done"
|
91
|
+
doc.to_html
|
92
|
+
end
|
93
|
+
|
60
94
|
private
|
61
95
|
def create_fetch_file_request(element, field)
|
62
96
|
file_url = URI.join(@url, element.attributes[field])
|
@@ -73,6 +107,17 @@ module EmbedHtml
|
|
73
107
|
end
|
74
108
|
return request
|
75
109
|
end
|
76
|
-
|
110
|
+
|
111
|
+
def fetch_file(element, field)
|
112
|
+
file_url = element.attributes[field]
|
113
|
+
@logger.debug "queue download file: #{file_url}"
|
114
|
+
|
115
|
+
type = MIME::Types.type_for(file_url).first.to_s rescue "application/data"
|
116
|
+
data = open(file_url.to_s).read
|
117
|
+
if data && type
|
118
|
+
data_b64 = Base64.encode64(data)
|
119
|
+
element.attributes[field] = "data:#{type};base64,#{data_b64}"
|
120
|
+
end
|
121
|
+
end
|
77
122
|
end
|
78
123
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 2
|
8
7
|
- 3
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.3.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Francis Chong
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-06-
|
17
|
+
date: 2010-06-25 00:00:00 +08:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -29,6 +29,18 @@ dependencies:
|
|
29
29
|
version: "0"
|
30
30
|
type: :runtime
|
31
31
|
version_requirements: *id001
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: mime-types
|
34
|
+
prerelease: false
|
35
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
segments:
|
40
|
+
- 0
|
41
|
+
version: "0"
|
42
|
+
type: :runtime
|
43
|
+
version_requirements: *id002
|
32
44
|
description: Download and embed images in html using base64 data encoding
|
33
45
|
email: francis@ignition.hk
|
34
46
|
executables:
|