embed_html 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -2,6 +2,7 @@ Dependencies
2
2
  ============
3
3
 
4
4
  * Hpricot
5
+ * Typhoeus
5
6
 
6
7
  Install
7
8
  =======
data/Rakefile CHANGED
@@ -3,13 +3,13 @@ require 'rubygems'
3
3
  require 'rake'
4
4
  require 'echoe'
5
5
 
6
- Echoe.new('embed_html', '0.3.0') do |p|
6
+ Echoe.new('embed_html', '0.3.2') do |p|
7
7
  p.description = "Download and embed images in html using base64 data encoding"
8
8
  p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
9
  p.url = "http://github.com/siuying/embed_html"
10
10
  p.author = "Francis Chong"
11
11
  p.email = "francis@ignition.hk"
12
12
  p.ignore_pattern = ["tmp/*", "script/*", "*.html"]
13
- p.runtime_dependencies = ["hpricot", "mime-types"]
13
+ p.runtime_dependencies = ["hpricot", "typhoeus", "mime-types"]
14
14
  end
15
15
 
data/embed_html.gemspec CHANGED
@@ -1,38 +1,39 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  Gem::Specification.new do |s|
4
- s.name = %q{embed_html}
5
- s.version = "0.3.0"
4
+ s.name = "embed_html"
5
+ s.version = "0.3.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Francis Chong"]
9
- s.date = %q{2010-06-25}
10
- s.default_executable = %q{eurl}
11
- s.description = %q{Download and embed images in html using base64 data encoding}
12
- s.email = %q{francis@ignition.hk}
9
+ s.date = "2012-02-01"
10
+ s.description = "Download and embed images in html using base64 data encoding"
11
+ s.email = "francis@ignition.hk"
13
12
  s.executables = ["eurl"]
14
13
  s.extra_rdoc_files = ["README.markdown", "bin/eurl", "lib/embed_html.rb", "lib/embed_html/embeder.rb"]
15
14
  s.files = ["Manifest", "README.markdown", "Rakefile", "bin/eurl", "lib/embed_html.rb", "lib/embed_html/embeder.rb", "embed_html.gemspec"]
16
- s.homepage = %q{http://github.com/siuying/embed_html}
15
+ s.homepage = "http://github.com/siuying/embed_html"
17
16
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Embed_html", "--main", "README.markdown"]
18
17
  s.require_paths = ["lib"]
19
- s.rubyforge_project = %q{embed_html}
20
- s.rubygems_version = %q{1.3.6}
21
- s.summary = %q{Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding}
18
+ s.rubyforge_project = "embed_html"
19
+ s.rubygems_version = "1.8.10"
20
+ s.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
22
21
 
23
22
  if s.respond_to? :specification_version then
24
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
23
  s.specification_version = 3
26
24
 
27
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
25
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
28
26
  s.add_runtime_dependency(%q<hpricot>, [">= 0"])
27
+ s.add_runtime_dependency(%q<typhoeus>, [">= 0"])
29
28
  s.add_runtime_dependency(%q<mime-types>, [">= 0"])
30
29
  else
31
30
  s.add_dependency(%q<hpricot>, [">= 0"])
31
+ s.add_dependency(%q<typhoeus>, [">= 0"])
32
32
  s.add_dependency(%q<mime-types>, [">= 0"])
33
33
  end
34
34
  else
35
35
  s.add_dependency(%q<hpricot>, [">= 0"])
36
+ s.add_dependency(%q<typhoeus>, [">= 0"])
36
37
  s.add_dependency(%q<mime-types>, [">= 0"])
37
38
  end
38
39
  end
@@ -10,25 +10,30 @@ module EmbedHtml
10
10
  class Embeder
11
11
  MAX_CONCURRENCY = 5
12
12
 
13
- attr_accessor :url
13
+ attr_accessor :url_or_html
14
14
  attr_accessor :logger
15
15
  attr_accessor :concurrency
16
+ attr_accessor :base_dirname
16
17
 
17
- def initialize(url, logger=Logger.new($stdout), concurrency=MAX_CONCURRENCY)
18
+ def initialize(url_or_html, logger=Logger.new($stdout), concurrency=MAX_CONCURRENCY)
18
19
  @logger = logger
19
- @url = url
20
+ @url_or_html = url_or_html
20
21
  @concurrency = concurrency
21
22
  end
22
23
 
23
24
  def process
24
- @logger.info "downloading url: #{@url}"
25
- html = Typhoeus::Request.get(@url.to_s).body
25
+ # @logger.info "downloading url: #{@url_or_html}"
26
+ html = (@url_or_html =~ /$http/) ? Typhoeus::Request.get(@url_or_html.to_s).body : @url_or_html
26
27
  doc = Hpricot(html)
27
28
 
28
29
  hydra = Typhoeus::Hydra.new(:max_concurrency => @concurrency)
29
30
  doc.search("//img").each do |img|
30
31
  begin
31
- hydra.queue create_fetch_file_request(img, 'src')
32
+ if img['src']=~ /^http/
33
+ hydra.queue create_fetch_file_request(img, 'src')
34
+ else
35
+ fetch_file(img, 'src')
36
+ end
32
37
  rescue StandardError => e
33
38
  @logger.error "failed download image: #{img['src']} #{e.inspect}"
34
39
  end
@@ -36,8 +41,10 @@ module EmbedHtml
36
41
 
37
42
  doc.search("//script").each do |script|
38
43
  begin
39
- if script['src']
44
+ if script['src'] and script['src'] =~ /^http/
40
45
  hydra.queue create_fetch_file_request(script, 'src')
46
+ elsif script['src']
47
+ fetch_file(script, 'src')
41
48
  end
42
49
  rescue StandardError => e
43
50
  @logger.error "failed download script: #{script['src']} #{e.inspect}"
@@ -46,7 +53,12 @@ module EmbedHtml
46
53
 
47
54
  doc.search("//link").each do |link|
48
55
  begin
49
- hydra.queue create_fetch_file_request(link, 'href')
56
+ url = link['href']
57
+ if url =~ /^http/
58
+ hydra.queue create_fetch_file_request(link, 'href')
59
+ else
60
+ fetch_file(link, 'href')
61
+ end
50
62
  rescue StandardError => e
51
63
  @logger.error "failed download linked resource: #{link['href']} #{e.inspect}"
52
64
  end
@@ -54,13 +66,13 @@ module EmbedHtml
54
66
 
55
67
  hydra.run
56
68
 
57
- @logger.info "done"
69
+ # @logger.info "done"
58
70
  doc.to_html
59
71
  end
60
72
 
61
73
  def process_local
62
- @logger.info "downloading url: #{@url}"
63
- html = open(@url).read
74
+ # @logger.info "downloading url: #{@url_or_html}"
75
+ html = open(@url_or_html).read
64
76
  doc = Hpricot(html)
65
77
 
66
78
  doc.search("//img").each do |img|
@@ -87,13 +99,13 @@ module EmbedHtml
87
99
  end
88
100
  end
89
101
 
90
- @logger.info "done"
102
+ # @logger.info "done"
91
103
  doc.to_html
92
104
  end
93
105
 
94
106
  private
95
107
  def create_fetch_file_request(element, field)
96
- file_url = URI.join(@url, element.attributes[field])
108
+ file_url = (@url_or_html =~ /^http/) ? URI.join(@url_or_html, element.attributes[field]) : element.attributes[field]
97
109
  @logger.debug "queue download file: #{file_url}"
98
110
 
99
111
  request = Typhoeus::Request.new(file_url.to_s)
@@ -109,8 +121,9 @@ module EmbedHtml
109
121
  end
110
122
 
111
123
  def fetch_file(element, field)
112
- file_url = element.attributes[field]
124
+ file_url = @base_dirname ? "#{@base_dirname.to_s}/#{element.attributes[field]}" : element.attributes[field]
113
125
  @logger.debug "queue download file: #{file_url}"
126
+ return unless File.exists?(file_url)
114
127
 
115
128
  type = MIME::Types.type_for(file_url).first.to_s rescue "application/data"
116
129
  data = open(file_url.to_s).read
metadata CHANGED
@@ -1,58 +1,60 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: embed_html
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 3
8
- - 0
9
- version: 0.3.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Francis Chong
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2010-06-25 00:00:00 +08:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: hpricot
16
+ requirement: &70320708644640 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
22
23
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 0
29
- version: "0"
24
+ version_requirements: *70320708644640
25
+ - !ruby/object:Gem::Dependency
26
+ name: typhoeus
27
+ requirement: &70320708641840 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
30
33
  type: :runtime
31
- version_requirements: *id001
32
- - !ruby/object:Gem::Dependency
33
- name: mime-types
34
34
  prerelease: false
35
- requirement: &id002 !ruby/object:Gem::Requirement
36
- requirements:
37
- - - ">="
38
- - !ruby/object:Gem::Version
39
- segments:
40
- - 0
41
- version: "0"
35
+ version_requirements: *70320708641840
36
+ - !ruby/object:Gem::Dependency
37
+ name: mime-types
38
+ requirement: &70320708641160 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
42
44
  type: :runtime
43
- version_requirements: *id002
45
+ prerelease: false
46
+ version_requirements: *70320708641160
44
47
  description: Download and embed images in html using base64 data encoding
45
48
  email: francis@ignition.hk
46
- executables:
49
+ executables:
47
50
  - eurl
48
51
  extensions: []
49
-
50
- extra_rdoc_files:
52
+ extra_rdoc_files:
51
53
  - README.markdown
52
54
  - bin/eurl
53
55
  - lib/embed_html.rb
54
56
  - lib/embed_html/embeder.rb
55
- files:
57
+ files:
56
58
  - Manifest
57
59
  - README.markdown
58
60
  - Rakefile
@@ -60,41 +62,35 @@ files:
60
62
  - lib/embed_html.rb
61
63
  - lib/embed_html/embeder.rb
62
64
  - embed_html.gemspec
63
- has_rdoc: true
64
65
  homepage: http://github.com/siuying/embed_html
65
66
  licenses: []
66
-
67
67
  post_install_message:
68
- rdoc_options:
68
+ rdoc_options:
69
69
  - --line-numbers
70
70
  - --inline-source
71
71
  - --title
72
72
  - Embed_html
73
73
  - --main
74
74
  - README.markdown
75
- require_paths:
75
+ require_paths:
76
76
  - lib
77
- required_ruby_version: !ruby/object:Gem::Requirement
78
- requirements:
79
- - - ">="
80
- - !ruby/object:Gem::Version
81
- segments:
82
- - 0
83
- version: "0"
84
- required_rubygems_version: !ruby/object:Gem::Requirement
85
- requirements:
86
- - - ">="
87
- - !ruby/object:Gem::Version
88
- segments:
89
- - 1
90
- - 2
91
- version: "1.2"
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: '1.2'
92
89
  requirements: []
93
-
94
90
  rubyforge_project: embed_html
95
- rubygems_version: 1.3.6
91
+ rubygems_version: 1.8.10
96
92
  signing_key:
97
93
  specification_version: 3
98
- summary: Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding
94
+ summary: Download or process a HTML page, find images there, download them and embed
95
+ it into the HTML using Base64 data encoding
99
96
  test_files: []
100
-