embed_html 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'rubygems'
3
3
  require 'rake'
4
4
  require 'echoe'
5
5
 
6
- Echoe.new('embed_html', '0.1.0') do |p|
6
+ Echoe.new('embed_html', '0.2.0') do |p|
7
7
  p.description = "Download and embed images in html using base64 data encoding"
8
8
  p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
9
  p.url = "http://github.com/siuying/embed_html"
data/embed_html.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{embed_html}
5
- s.version = "0.1.0"
5
+ s.version = "0.2.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Francis Chong"]
9
- s.date = %q{2010-05-20}
9
+ s.date = %q{2010-06-22}
10
10
  s.default_executable = %q{eurl}
11
11
  s.description = %q{Download and embed images in html using base64 data encoding}
12
12
  s.email = %q{francis@ignition.hk}
@@ -25,26 +25,50 @@ module EmbedHtml
25
25
  hydra = Typhoeus::Hydra.new(:max_concurrency => MAX_CONCURRENCY)
26
26
  doc.search("//img").each do |img|
27
27
  begin
28
- image_url = URI.join(@url, img.attributes['src'])
29
- @logger.debug "queue download image: #{image_url}"
28
+ hydra.queue create_fetch_file_request(img, 'src')
29
+ rescue StandardError => e
30
+ @logger.error "failed download image: #{img['src']}"
31
+ end
32
+ end
33
+
34
+ doc.search("//script").each do |script|
35
+ begin
36
+ hydra.queue create_fetch_file_request(script, 'src')
37
+ rescue StandardError => e
38
+ @logger.error "failed download script: #{script['src']}"
39
+ end
40
+ end
30
41
 
31
- request = Typhoeus::Request.new(image_url.to_s)
32
- request.on_complete do |response|
33
- data = response.body
34
- type = response.headers_hash["Content-Type"]
35
- if data && type
36
- data_b64 = Base64.encode64(data)
37
- img.attributes['src'] = "data:#{type};base64,#{data_b64}"
38
- end
39
- end
40
- hydra.queue request
42
+ doc.search("//link").each do |link|
43
+ begin
44
+ hydra.queue create_fetch_file_request(link, 'href')
41
45
  rescue StandardError => e
42
- @logger.error "failed downloading image: #{image_url} (#{e.message})"
46
+ @logger.error "failed download linked resource: #{link['href']}"
43
47
  end
44
48
  end
49
+
45
50
  hydra.run
51
+
46
52
  @logger.info "done"
47
53
  doc.to_html
48
54
  end
55
+
56
+ private
57
+ def create_fetch_file_request(element, field)
58
+ file_url = URI.join(@url, element.attributes[field])
59
+ @logger.debug "queue download file: #{file_url}"
60
+
61
+ request = Typhoeus::Request.new(file_url.to_s)
62
+ request.on_complete do |response|
63
+ data = response.body
64
+ type = response.headers_hash["Content-Type"]
65
+ if data && type
66
+ data_b64 = Base64.encode64(data)
67
+ element.attributes[field] = "data:#{type};base64,#{data_b64}"
68
+ end
69
+ end
70
+ return request
71
+ end
72
+
49
73
  end
50
74
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
7
+ - 2
8
8
  - 0
9
- version: 0.1.0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Francis Chong
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-05-20 00:00:00 +08:00
17
+ date: 2010-06-22 00:00:00 +08:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency