embed_html 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/embed_html.gemspec +2 -2
- data/lib/embed_html/embeder.rb +37 -13
- metadata +3 -3
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require 'rubygems'
|
|
3
3
|
require 'rake'
|
4
4
|
require 'echoe'
|
5
5
|
|
6
|
-
Echoe.new('embed_html', '0.
|
6
|
+
Echoe.new('embed_html', '0.2.0') do |p|
|
7
7
|
p.description = "Download and embed images in html using base64 data encoding"
|
8
8
|
p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
|
9
9
|
p.url = "http://github.com/siuying/embed_html"
|
data/embed_html.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{embed_html}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.2.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Francis Chong"]
|
9
|
-
s.date = %q{2010-
|
9
|
+
s.date = %q{2010-06-22}
|
10
10
|
s.default_executable = %q{eurl}
|
11
11
|
s.description = %q{Download and embed images in html using base64 data encoding}
|
12
12
|
s.email = %q{francis@ignition.hk}
|
data/lib/embed_html/embeder.rb
CHANGED
@@ -25,26 +25,50 @@ module EmbedHtml
|
|
25
25
|
hydra = Typhoeus::Hydra.new(:max_concurrency => MAX_CONCURRENCY)
|
26
26
|
doc.search("//img").each do |img|
|
27
27
|
begin
|
28
|
-
|
29
|
-
|
28
|
+
hydra.queue create_fetch_file_request(img, 'src')
|
29
|
+
rescue StandardError => e
|
30
|
+
@logger.error "failed download image: #{img['src']}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
doc.search("//script").each do |script|
|
35
|
+
begin
|
36
|
+
hydra.queue create_fetch_file_request(script, 'src')
|
37
|
+
rescue StandardError => e
|
38
|
+
@logger.error "failed download script: #{script['src']}"
|
39
|
+
end
|
40
|
+
end
|
30
41
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
type = response.headers_hash["Content-Type"]
|
35
|
-
if data && type
|
36
|
-
data_b64 = Base64.encode64(data)
|
37
|
-
img.attributes['src'] = "data:#{type};base64,#{data_b64}"
|
38
|
-
end
|
39
|
-
end
|
40
|
-
hydra.queue request
|
42
|
+
doc.search("//link").each do |link|
|
43
|
+
begin
|
44
|
+
hydra.queue create_fetch_file_request(link, 'href')
|
41
45
|
rescue StandardError => e
|
42
|
-
@logger.error "failed
|
46
|
+
@logger.error "failed download linked resource: #{link['href']}"
|
43
47
|
end
|
44
48
|
end
|
49
|
+
|
45
50
|
hydra.run
|
51
|
+
|
46
52
|
@logger.info "done"
|
47
53
|
doc.to_html
|
48
54
|
end
|
55
|
+
|
56
|
+
private
|
57
|
+
def create_fetch_file_request(element, field)
|
58
|
+
file_url = URI.join(@url, element.attributes[field])
|
59
|
+
@logger.debug "queue download file: #{file_url}"
|
60
|
+
|
61
|
+
request = Typhoeus::Request.new(file_url.to_s)
|
62
|
+
request.on_complete do |response|
|
63
|
+
data = response.body
|
64
|
+
type = response.headers_hash["Content-Type"]
|
65
|
+
if data && type
|
66
|
+
data_b64 = Base64.encode64(data)
|
67
|
+
element.attributes[field] = "data:#{type};base64,#{data_b64}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
return request
|
71
|
+
end
|
72
|
+
|
49
73
|
end
|
50
74
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 2
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Francis Chong
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-06-22 00:00:00 +08:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|