embed_html 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'rubygems'
3
3
  require 'rake'
4
4
  require 'echoe'
5
5
 
6
- Echoe.new('embed_html', '0.1.0') do |p|
6
+ Echoe.new('embed_html', '0.2.0') do |p|
7
7
  p.description = "Download and embed images in html using base64 data encoding"
8
8
  p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
9
  p.url = "http://github.com/siuying/embed_html"
data/embed_html.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{embed_html}
5
- s.version = "0.1.0"
5
+ s.version = "0.2.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Francis Chong"]
9
- s.date = %q{2010-05-20}
9
+ s.date = %q{2010-06-22}
10
10
  s.default_executable = %q{eurl}
11
11
  s.description = %q{Download and embed images in html using base64 data encoding}
12
12
  s.email = %q{francis@ignition.hk}
@@ -25,26 +25,50 @@ module EmbedHtml
25
25
  hydra = Typhoeus::Hydra.new(:max_concurrency => MAX_CONCURRENCY)
26
26
  doc.search("//img").each do |img|
27
27
  begin
28
- image_url = URI.join(@url, img.attributes['src'])
29
- @logger.debug "queue download image: #{image_url}"
28
+ hydra.queue create_fetch_file_request(img, 'src')
29
+ rescue StandardError => e
30
+ @logger.error "failed download image: #{img['src']}"
31
+ end
32
+ end
33
+
34
+ doc.search("//script").each do |script|
35
+ begin
36
+ hydra.queue create_fetch_file_request(script, 'src')
37
+ rescue StandardError => e
38
+ @logger.error "failed download script: #{script['src']}"
39
+ end
40
+ end
30
41
 
31
- request = Typhoeus::Request.new(image_url.to_s)
32
- request.on_complete do |response|
33
- data = response.body
34
- type = response.headers_hash["Content-Type"]
35
- if data && type
36
- data_b64 = Base64.encode64(data)
37
- img.attributes['src'] = "data:#{type};base64,#{data_b64}"
38
- end
39
- end
40
- hydra.queue request
42
+ doc.search("//link").each do |link|
43
+ begin
44
+ hydra.queue create_fetch_file_request(link, 'href')
41
45
  rescue StandardError => e
42
- @logger.error "failed downloading image: #{image_url} (#{e.message})"
46
+ @logger.error "failed download linked resource: #{link['href']}"
43
47
  end
44
48
  end
49
+
45
50
  hydra.run
51
+
46
52
  @logger.info "done"
47
53
  doc.to_html
48
54
  end
55
+
56
+ private
57
+ def create_fetch_file_request(element, field)
58
+ file_url = URI.join(@url, element.attributes[field])
59
+ @logger.debug "queue download file: #{file_url}"
60
+
61
+ request = Typhoeus::Request.new(file_url.to_s)
62
+ request.on_complete do |response|
63
+ data = response.body
64
+ type = response.headers_hash["Content-Type"]
65
+ if data && type
66
+ data_b64 = Base64.encode64(data)
67
+ element.attributes[field] = "data:#{type};base64,#{data_b64}"
68
+ end
69
+ end
70
+ return request
71
+ end
72
+
49
73
  end
50
74
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
7
+ - 2
8
8
  - 0
9
- version: 0.1.0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Francis Chong
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-05-20 00:00:00 +08:00
17
+ date: 2010-06-22 00:00:00 +08:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency