embed_html 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'rubygems'
3
3
  require 'rake'
4
4
  require 'echoe'
5
5
 
6
- Echoe.new('embed_html', '0.2.2') do |p|
6
+ Echoe.new('embed_html', '0.2.3') do |p|
7
7
  p.description = "Download and embed images in html using base64 data encoding"
8
8
  p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
9
  p.url = "http://github.com/siuying/embed_html"
data/bin/eurl CHANGED
@@ -2,12 +2,13 @@ require 'embed_html'
2
2
 
3
3
  url = ARGV[0]
4
4
  file = ARGV[1]
5
+ concurrency = ARGV[2].nil? ? 5 : ARGV[2].to_i
5
6
 
6
7
  if url && file
7
8
  log = Logger.new($stdout)
8
9
  log.level = Logger::INFO
9
10
 
10
- html = EmbedHtml::Embeder.new(url, log).process
11
+ html = EmbedHtml::Embeder.new(url, log, concurrency).process
11
12
  File.open(file, 'w') {|f| f.write(html)}
12
13
 
13
14
  else
data/embed_html.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{embed_html}
5
- s.version = "0.2.2"
5
+ s.version = "0.2.3"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Francis Chong"]
@@ -11,10 +11,12 @@ module EmbedHtml
11
11
 
12
12
  attr_accessor :url
13
13
  attr_accessor :logger
14
+ attr_accessor :concurrency
14
15
 
15
- def initialize(url, logger=Logger.new($stdout))
16
+ def initialize(url, logger=Logger.new($stdout), concurrency=MAX_CONCURRENCY)
16
17
  @logger = logger
17
18
  @url = url
19
+ @concurrency = concurrency
18
20
  end
19
21
 
20
22
  def process
@@ -22,20 +24,22 @@ module EmbedHtml
22
24
  html = Typhoeus::Request.get(@url.to_s).body
23
25
  doc = Hpricot(html)
24
26
 
25
- hydra = Typhoeus::Hydra.new(:max_concurrency => MAX_CONCURRENCY)
27
+ hydra = Typhoeus::Hydra.new(:max_concurrency => @concurrency)
26
28
  doc.search("//img").each do |img|
27
29
  begin
28
30
  hydra.queue create_fetch_file_request(img, 'src')
29
31
  rescue StandardError => e
30
- @logger.error "failed download image: #{img['src']}"
32
+ @logger.error "failed download image: #{img['src']} #{e.inspect}"
31
33
  end
32
34
  end
33
35
 
34
36
  doc.search("//script").each do |script|
35
37
  begin
36
- hydra.queue create_fetch_file_request(script, 'src')
38
+ if script['src']
39
+ hydra.queue create_fetch_file_request(script, 'src')
40
+ end
37
41
  rescue StandardError => e
38
- @logger.error "failed download script: #{script['src']}"
42
+ @logger.error "failed download script: #{script['src']} #{e.inspect}"
39
43
  end
40
44
  end
41
45
 
@@ -43,7 +47,7 @@ module EmbedHtml
43
47
  begin
44
48
  hydra.queue create_fetch_file_request(link, 'href')
45
49
  rescue StandardError => e
46
- @logger.error "failed download linked resource: #{link['href']}"
50
+ @logger.error "failed download linked resource: #{link['href']} #{e.inspect}"
47
51
  end
48
52
  end
49
53
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 2
9
- version: 0.2.2
8
+ - 3
9
+ version: 0.2.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Francis Chong