embed_html 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -3,13 +3,13 @@ require 'rubygems'
3
3
  require 'rake'
4
4
  require 'echoe'
5
5
 
6
- Echoe.new('embed_html', '0.2.3') do |p|
6
+ Echoe.new('embed_html', '0.3.0') do |p|
7
7
  p.description = "Download and embed images in html using base64 data encoding"
8
8
  p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
9
  p.url = "http://github.com/siuying/embed_html"
10
10
  p.author = "Francis Chong"
11
11
  p.email = "francis@ignition.hk"
12
12
  p.ignore_pattern = ["tmp/*", "script/*", "*.html"]
13
- p.runtime_dependencies = ["hpricot"]
13
+ p.runtime_dependencies = ["hpricot", "mime-types"]
14
14
  end
15
15
 
data/bin/eurl CHANGED
@@ -8,10 +8,14 @@ if url && file
8
8
  log = Logger.new($stdout)
9
9
  log.level = Logger::INFO
10
10
 
11
- html = EmbedHtml::Embeder.new(url, log, concurrency).process
11
+ if url =~ /^http/
12
+ html = EmbedHtml::Embeder.new(url, log, concurrency).process
13
+ else
14
+ html = EmbedHtml::Embeder.new(url, log, concurrency).process_local
15
+ end
12
16
  File.open(file, 'w') {|f| f.write(html)}
13
17
 
14
18
  else
15
- puts "usage: eurl <URL> <OUTPUT_FILE>"
19
+ puts "usage: eurl <URL-OR-LOCAL_FILE> <OUTPUT_FILE>"
16
20
 
17
21
  end
data/embed_html.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{embed_html}
5
- s.version = "0.2.3"
5
+ s.version = "0.3.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Francis Chong"]
9
- s.date = %q{2010-06-22}
9
+ s.date = %q{2010-06-25}
10
10
  s.default_executable = %q{eurl}
11
11
  s.description = %q{Download and embed images in html using base64 data encoding}
12
12
  s.email = %q{francis@ignition.hk}
@@ -26,10 +26,13 @@ Gem::Specification.new do |s|
26
26
 
27
27
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
28
  s.add_runtime_dependency(%q<hpricot>, [">= 0"])
29
+ s.add_runtime_dependency(%q<mime-types>, [">= 0"])
29
30
  else
30
31
  s.add_dependency(%q<hpricot>, [">= 0"])
32
+ s.add_dependency(%q<mime-types>, [">= 0"])
31
33
  end
32
34
  else
33
35
  s.add_dependency(%q<hpricot>, [">= 0"])
36
+ s.add_dependency(%q<mime-types>, [">= 0"])
34
37
  end
35
38
  end
@@ -4,6 +4,7 @@ require 'hpricot'
4
4
  require 'uri'
5
5
  require 'base64'
6
6
  require 'typhoeus'
7
+ require 'mime/types'
7
8
 
8
9
  module EmbedHtml
9
10
  class Embeder
@@ -57,6 +58,39 @@ module EmbedHtml
57
58
  doc.to_html
58
59
  end
59
60
 
61
+ def process_local
62
+ @logger.info "downloading url: #{@url}"
63
+ html = open(@url).read
64
+ doc = Hpricot(html)
65
+
66
+ doc.search("//img").each do |img|
67
+ begin
68
+ fetch_file(img, 'src')
69
+ rescue StandardError => e
70
+ @logger.error "failed download image: #{img['src']} #{e.inspect}"
71
+ end
72
+ end
73
+
74
+ doc.search("//script").each do |script|
75
+ begin
76
+ fetch_file(script, 'src')
77
+ rescue StandardError => e
78
+ @logger.error "failed download script: #{script['src']} #{e.inspect}"
79
+ end
80
+ end
81
+
82
+ doc.search("//link").each do |link|
83
+ begin
84
+ fetch_file(link, 'href')
85
+ rescue StandardError => e
86
+ @logger.error "failed download linked resource: #{link['href']} #{e.inspect}"
87
+ end
88
+ end
89
+
90
+ @logger.info "done"
91
+ doc.to_html
92
+ end
93
+
60
94
  private
61
95
  def create_fetch_file_request(element, field)
62
96
  file_url = URI.join(@url, element.attributes[field])
@@ -73,6 +107,17 @@ module EmbedHtml
73
107
  end
74
108
  return request
75
109
  end
76
-
110
+
111
+ def fetch_file(element, field)
112
+ file_url = element.attributes[field]
113
+ @logger.debug "queue download file: #{file_url}"
114
+
115
+ type = MIME::Types.type_for(file_url).first.to_s rescue "application/data"
116
+ data = open(file_url.to_s).read
117
+ if data && type
118
+ data_b64 = Base64.encode64(data)
119
+ element.attributes[field] = "data:#{type};base64,#{data_b64}"
120
+ end
121
+ end
77
122
  end
78
123
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 2
8
7
  - 3
9
- version: 0.2.3
8
+ - 0
9
+ version: 0.3.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Francis Chong
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-06-22 00:00:00 +08:00
17
+ date: 2010-06-25 00:00:00 +08:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -29,6 +29,18 @@ dependencies:
29
29
  version: "0"
30
30
  type: :runtime
31
31
  version_requirements: *id001
32
+ - !ruby/object:Gem::Dependency
33
+ name: mime-types
34
+ prerelease: false
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ segments:
40
+ - 0
41
+ version: "0"
42
+ type: :runtime
43
+ version_requirements: *id002
32
44
  description: Download and embed images in html using base64 data encoding
33
45
  email: francis@ignition.hk
34
46
  executables: