embed_html 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'rubygems'
3
3
  require 'rake'
4
4
  require 'echoe'
5
5
 
6
- Echoe.new('embed_html', '0.2.2') do |p|
6
+ Echoe.new('embed_html', '0.2.3') do |p|
7
7
  p.description = "Download and embed images in html using base64 data encoding"
8
8
  p.summary = "Download or process a HTML page, find images there, download them and embed it into the HTML using Base64 data encoding"
9
9
  p.url = "http://github.com/siuying/embed_html"
data/bin/eurl CHANGED
@@ -2,12 +2,13 @@ require 'embed_html'
2
2
 
3
3
  url = ARGV[0]
4
4
  file = ARGV[1]
5
+ concurrency = ARGV[2].nil? ? 5 : ARGV[2].to_i
5
6
 
6
7
  if url && file
7
8
  log = Logger.new($stdout)
8
9
  log.level = Logger::INFO
9
10
 
10
- html = EmbedHtml::Embeder.new(url, log).process
11
+ html = EmbedHtml::Embeder.new(url, log, concurrency).process
11
12
  File.open(file, 'w') {|f| f.write(html)}
12
13
 
13
14
  else
data/embed_html.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{embed_html}
5
- s.version = "0.2.2"
5
+ s.version = "0.2.3"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Francis Chong"]
@@ -11,10 +11,12 @@ module EmbedHtml
11
11
 
12
12
  attr_accessor :url
13
13
  attr_accessor :logger
14
+ attr_accessor :concurrency
14
15
 
15
- def initialize(url, logger=Logger.new($stdout))
16
+ def initialize(url, logger=Logger.new($stdout), concurrency=MAX_CONCURRENCY)
16
17
  @logger = logger
17
18
  @url = url
19
+ @concurrency = concurrency
18
20
  end
19
21
 
20
22
  def process
@@ -22,20 +24,22 @@ module EmbedHtml
22
24
  html = Typhoeus::Request.get(@url.to_s).body
23
25
  doc = Hpricot(html)
24
26
 
25
- hydra = Typhoeus::Hydra.new(:max_concurrency => MAX_CONCURRENCY)
27
+ hydra = Typhoeus::Hydra.new(:max_concurrency => @concurrency)
26
28
  doc.search("//img").each do |img|
27
29
  begin
28
30
  hydra.queue create_fetch_file_request(img, 'src')
29
31
  rescue StandardError => e
30
- @logger.error "failed download image: #{img['src']}"
32
+ @logger.error "failed download image: #{img['src']} #{e.inspect}"
31
33
  end
32
34
  end
33
35
 
34
36
  doc.search("//script").each do |script|
35
37
  begin
36
- hydra.queue create_fetch_file_request(script, 'src')
38
+ if script['src']
39
+ hydra.queue create_fetch_file_request(script, 'src')
40
+ end
37
41
  rescue StandardError => e
38
- @logger.error "failed download script: #{script['src']}"
42
+ @logger.error "failed download script: #{script['src']} #{e.inspect}"
39
43
  end
40
44
  end
41
45
 
@@ -43,7 +47,7 @@ module EmbedHtml
43
47
  begin
44
48
  hydra.queue create_fetch_file_request(link, 'href')
45
49
  rescue StandardError => e
46
- @logger.error "failed download linked resource: #{link['href']}"
50
+ @logger.error "failed download linked resource: #{link['href']} #{e.inspect}"
47
51
  end
48
52
  end
49
53
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 2
9
- version: 0.2.2
8
+ - 3
9
+ version: 0.2.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Francis Chong