embed_html 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -3,13 +3,13 @@ require 'rubygems'
3
3
  require 'rake'
4
4
  require 'echoe'
5
5
 
6
- Echoe.new('embed_html', '0.2.3') do |p|
6
+ Echoe.new('embed_html', '0.3.0') do |p|
7
7
  p.description = "Download and embed images in html using base64 data encoding"
8
8
  p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
9
  p.url = "http://github.com/siuying/embed_html"
10
10
  p.author = "Francis Chong"
11
11
  p.email = "francis@ignition.hk"
12
12
  p.ignore_pattern = ["tmp/*", "script/*", "*.html"]
13
- p.runtime_dependencies = ["hpricot"]
13
+ p.runtime_dependencies = ["hpricot", "mime-types"]
14
14
  end
15
15
 
data/bin/eurl CHANGED
@@ -8,10 +8,14 @@ if url && file
8
8
  log = Logger.new($stdout)
9
9
  log.level = Logger::INFO
10
10
 
11
- html = EmbedHtml::Embeder.new(url, log, concurrency).process
11
+ if url =~ /^http/
12
+ html = EmbedHtml::Embeder.new(url, log, concurrency).process
13
+ else
14
+ html = EmbedHtml::Embeder.new(url, log, concurrency).process_local
15
+ end
12
16
  File.open(file, 'w') {|f| f.write(html)}
13
17
 
14
18
  else
15
- puts "usage: eurl <URL> <OUTPUT_FILE>"
19
+ puts "usage: eurl <URL-OR-LOCAL_FILE> <OUTPUT_FILE>"
16
20
 
17
21
  end
data/embed_html.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{embed_html}
5
- s.version = "0.2.3"
5
+ s.version = "0.3.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Francis Chong"]
9
- s.date = %q{2010-06-22}
9
+ s.date = %q{2010-06-25}
10
10
  s.default_executable = %q{eurl}
11
11
  s.description = %q{Download and embed images in html using base64 data encoding}
12
12
  s.email = %q{francis@ignition.hk}
@@ -26,10 +26,13 @@ Gem::Specification.new do |s|
26
26
 
27
27
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
28
  s.add_runtime_dependency(%q<hpricot>, [">= 0"])
29
+ s.add_runtime_dependency(%q<mime-types>, [">= 0"])
29
30
  else
30
31
  s.add_dependency(%q<hpricot>, [">= 0"])
32
+ s.add_dependency(%q<mime-types>, [">= 0"])
31
33
  end
32
34
  else
33
35
  s.add_dependency(%q<hpricot>, [">= 0"])
36
+ s.add_dependency(%q<mime-types>, [">= 0"])
34
37
  end
35
38
  end
@@ -4,6 +4,7 @@ require 'hpricot'
4
4
  require 'uri'
5
5
  require 'base64'
6
6
  require 'typhoeus'
7
+ require 'mime/types'
7
8
 
8
9
  module EmbedHtml
9
10
  class Embeder
@@ -57,6 +58,39 @@ module EmbedHtml
57
58
  doc.to_html
58
59
  end
59
60
 
61
+ def process_local
62
+ @logger.info "downloading url: #{@url}"
63
+ html = open(@url).read
64
+ doc = Hpricot(html)
65
+
66
+ doc.search("//img").each do |img|
67
+ begin
68
+ fetch_file(img, 'src')
69
+ rescue StandardError => e
70
+ @logger.error "failed download image: #{img['src']} #{e.inspect}"
71
+ end
72
+ end
73
+
74
+ doc.search("//script").each do |script|
75
+ begin
76
+ fetch_file(script, 'src')
77
+ rescue StandardError => e
78
+ @logger.error "failed download script: #{script['src']} #{e.inspect}"
79
+ end
80
+ end
81
+
82
+ doc.search("//link").each do |link|
83
+ begin
84
+ fetch_file(link, 'href')
85
+ rescue StandardError => e
86
+ @logger.error "failed download linked resource: #{link['href']} #{e.inspect}"
87
+ end
88
+ end
89
+
90
+ @logger.info "done"
91
+ doc.to_html
92
+ end
93
+
60
94
  private
61
95
  def create_fetch_file_request(element, field)
62
96
  file_url = URI.join(@url, element.attributes[field])
@@ -73,6 +107,17 @@ module EmbedHtml
73
107
  end
74
108
  return request
75
109
  end
76
-
110
+
111
+ def fetch_file(element, field)
112
+ file_url = element.attributes[field]
113
+ @logger.debug "queue download file: #{file_url}"
114
+
115
+ type = MIME::Types.type_for(file_url).first.to_s rescue "application/data"
116
+ data = open(file_url.to_s).read
117
+ if data && type
118
+ data_b64 = Base64.encode64(data)
119
+ element.attributes[field] = "data:#{type};base64,#{data_b64}"
120
+ end
121
+ end
77
122
  end
78
123
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 2
8
7
  - 3
9
- version: 0.2.3
8
+ - 0
9
+ version: 0.3.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Francis Chong
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-06-22 00:00:00 +08:00
17
+ date: 2010-06-25 00:00:00 +08:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -29,6 +29,18 @@ dependencies:
29
29
  version: "0"
30
30
  type: :runtime
31
31
  version_requirements: *id001
32
+ - !ruby/object:Gem::Dependency
33
+ name: mime-types
34
+ prerelease: false
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ segments:
40
+ - 0
41
+ version: "0"
42
+ type: :runtime
43
+ version_requirements: *id002
32
44
  description: Download and embed images in html using base64 data encoding
33
45
  email: francis@ignition.hk
34
46
  executables: