embed_html 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest ADDED
@@ -0,0 +1,6 @@
1
+ Manifest
2
+ README.markdown
3
+ Rakefile
4
+ bin/eurl
5
+ lib/embed_html.rb
6
+ lib/embed_html/embeder.rb
data/README.markdown ADDED
@@ -0,0 +1,28 @@
1
+ Dependencies
2
+ ============
3
+
4
+ * Hpricot
5
+ * Typhoeus
6
+
7
+ Install
8
+ =======
9
+
10
+ > gem install embed_html
11
+
12
+ Usage
13
+ =======
14
+
15
+ > eurl _<input-url>_ _<output-file>_
16
+
17
+ Example:
18
+
19
+ > eurl "http://en.wikipedia.org/wiki/Wiki" wiki.html
20
+
21
+ Output:
22
+
23
+ > I, [2010-05-20T15:56:58.315769 #26393] INFO -- : downloading url: http://en.wikipedia.org/wiki/Wiki
24
+
25
+ > I, [2010-05-20T15:57:02.033068 #26393] INFO -- : done
26
+
27
+ The file wiki.html then contains the specified page, with all of the images embeded
28
+
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ # Rakefile
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'echoe'
5
+
6
+ Echoe.new('embed_html', '0.1.0') do |p|
7
+ p.description = "Download and embed images in html using base64 data encoding"
8
+ p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
+ p.url = "http://github.com/siuying/embed_html"
10
+ p.author = "Francis Chong"
11
+ p.email = "francis@ignition.hk"
12
+ p.ignore_pattern = ["tmp/*", "script/*", "*.html"]
13
+ p.runtime_dependencies = ["hpricot", "typhoeus"]
14
+ end
15
+
data/bin/eurl ADDED
@@ -0,0 +1,16 @@
1
+ require 'embed_html'
2
+
3
+ url = ARGV[0]
4
+ file = ARGV[1]
5
+
6
+ if url && file
7
+ log = Logger.new($stdout)
8
+ log.level = Logger::INFO
9
+
10
+ html = EmbedHtml::Embeder.new(url, log).process
11
+ File.open(file, 'w') {|f| f.write(html)}
12
+
13
+ else
14
+ puts "usage: eurl <URL> <OUTPUT_FILE>"
15
+
16
+ end
@@ -0,0 +1,38 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{embed_html}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Francis Chong"]
9
+ s.date = %q{2010-05-20}
10
+ s.default_executable = %q{eurl}
11
+ s.description = %q{Download and embed images in html using base64 data encoding}
12
+ s.email = %q{francis@ignition.hk}
13
+ s.executables = ["eurl"]
14
+ s.extra_rdoc_files = ["README.markdown", "bin/eurl", "lib/embed_html.rb", "lib/embed_html/embeder.rb"]
15
+ s.files = ["Manifest", "README.markdown", "Rakefile", "bin/eurl", "lib/embed_html.rb", "lib/embed_html/embeder.rb", "embed_html.gemspec"]
16
+ s.homepage = %q{http://github.com/siuying/embed_html}
17
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Embed_html", "--main", "README.markdown"]
18
+ s.require_paths = ["lib"]
19
+ s.rubyforge_project = %q{embed_html}
20
+ s.rubygems_version = %q{1.3.6}
21
+ s.summary = %q{Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding}
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 3
26
+
27
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
+ s.add_runtime_dependency(%q<hpricot>, [">= 0"])
29
+ s.add_runtime_dependency(%q<typhoeus>, [">= 0"])
30
+ else
31
+ s.add_dependency(%q<hpricot>, [">= 0"])
32
+ s.add_dependency(%q<typhoeus>, [">= 0"])
33
+ end
34
+ else
35
+ s.add_dependency(%q<hpricot>, [">= 0"])
36
+ s.add_dependency(%q<typhoeus>, [">= 0"])
37
+ end
38
+ end
@@ -0,0 +1,50 @@
1
+ require 'logger'
2
+ require 'open-uri'
3
+ require 'hpricot'
4
+ require 'uri'
5
+ require 'base64'
6
+ require 'typhoeus'
7
+
8
+ module EmbedHtml
9
+ class Embeder
10
+ MAX_CONCURRENCY = 5
11
+
12
+ attr_accessor :url
13
+ attr_accessor :logger
14
+
15
+ def initialize(url, logger=Logger.new($stdout))
16
+ @logger = logger
17
+ @url = url
18
+ end
19
+
20
+ def process
21
+ @logger.info "downloading url: #{@url}"
22
+ html = Typhoeus::Request.get(@url.to_s).body
23
+ doc = Hpricot(html)
24
+
25
+ hydra = Typhoeus::Hydra.new(:max_concurrency => MAX_CONCURRENCY)
26
+ doc.search("//img").each do |img|
27
+ begin
28
+ image_url = URI.join(@url, img.attributes['src'])
29
+ @logger.debug "queue download image: #{image_url}"
30
+
31
+ request = Typhoeus::Request.new(image_url.to_s)
32
+ request.on_complete do |response|
33
+ data = response.body
34
+ type = response.headers_hash["Content-Type"]
35
+ if data && type
36
+ data_b64 = Base64.encode64(data)
37
+ img.attributes['src'] = "data:#{type};base64,#{data_b64}"
38
+ end
39
+ end
40
+ hydra.queue request
41
+ rescue StandardError => e
42
+ @logger.error "failed downloading image: #{image_url} (#{e.message})"
43
+ end
44
+ end
45
+ hydra.run
46
+ @logger.info "done"
47
+ doc.to_html
48
+ end
49
+ end
50
+ end
data/lib/embed_html.rb ADDED
@@ -0,0 +1,4 @@
1
+ path = File.dirname(__FILE__)
2
+ $:.unshift(path) unless $:.include?(path)
3
+
4
+ require 'embed_html/embeder'
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embed_html
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Francis Chong
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-05-20 00:00:00 +08:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: hpricot
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :runtime
31
+ version_requirements: *id001
32
+ - !ruby/object:Gem::Dependency
33
+ name: typhoeus
34
+ prerelease: false
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ segments:
40
+ - 0
41
+ version: "0"
42
+ type: :runtime
43
+ version_requirements: *id002
44
+ description: Download and embed images in html using base64 data encoding
45
+ email: francis@ignition.hk
46
+ executables:
47
+ - eurl
48
+ extensions: []
49
+
50
+ extra_rdoc_files:
51
+ - README.markdown
52
+ - bin/eurl
53
+ - lib/embed_html.rb
54
+ - lib/embed_html/embeder.rb
55
+ files:
56
+ - Manifest
57
+ - README.markdown
58
+ - Rakefile
59
+ - bin/eurl
60
+ - lib/embed_html.rb
61
+ - lib/embed_html/embeder.rb
62
+ - embed_html.gemspec
63
+ has_rdoc: true
64
+ homepage: http://github.com/siuying/embed_html
65
+ licenses: []
66
+
67
+ post_install_message:
68
+ rdoc_options:
69
+ - --line-numbers
70
+ - --inline-source
71
+ - --title
72
+ - Embed_html
73
+ - --main
74
+ - README.markdown
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ segments:
89
+ - 1
90
+ - 2
91
+ version: "1.2"
92
+ requirements: []
93
+
94
+ rubyforge_project: embed_html
95
+ rubygems_version: 1.3.6
96
+ signing_key:
97
+ specification_version: 3
98
+ summary: Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding
99
+ test_files: []
100
+