image_downloader 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -32,15 +32,15 @@ After installation, you can use the following code as an example:
32
32
  # download all images on page in any place (by regexp, all that look like url with image)
33
33
  downloader.parse(:any_looks_like_image => true)
34
34
 
35
- #####
35
+ ##### or
36
36
  # download images from all elements where usually images placed (<img...>, <a...>, ...)
37
37
  downloader.parse()
38
38
 
39
- #####
39
+ ##### or
40
40
  # download image from exect places in page
41
41
  downloader.parse(:collect => {:link_icon => true})
42
42
 
43
- #####
43
+ ##### or
44
44
  # download images by regexp
45
45
  downloader.parse(:regexp => /[^'"]+\.jpg/i)
46
46
 
@@ -58,7 +58,7 @@ For "parse" method available following options
58
58
  }
59
59
 
60
60
  # find by regexp
61
- :regexp => /['"]([^'"]+\.jpg)[^'"]*['"]/i)
61
+ :regexp => /['"]([^'"]+\.jpg)[^'"]*['"]/i) # for ruby 1.8 (in 1.9 not allowed () for scan method)
62
62
  :regexp => /[^'"]+\.jpg/i # the same, but shorter
63
63
  :regexp => /[^'"]+\.css/ # other files can also be downloaded
64
64
 
@@ -30,23 +30,33 @@ module ImageDownloader
30
30
  end
31
31
 
32
32
  def get_content_raw
33
- @content = open(self.argument_url, 'User-Agent' => self.user_agent).read
33
+ @content = self.open_url.read
34
34
  @content.gsub!(/[\n\r\t]+/,' ')
35
35
  end
36
36
 
37
37
  def get_images_raw(path,h={})
38
- self.content.scan(/['"]+[^'"]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')})['"]+/).map{|src|
39
- src.gsub!(/['"]/,'')
40
- self.push_to_images(path,src)
38
+ self.content.scan(/['"]+([^'"]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^'"]*['"]+/i) {|src|
39
+ self.push_to_images(path,Parser.clear(src[0]))
40
+ }
41
+ self.content.scan(/(?:href|src)=([^\s'">]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^\s'">]*[>\s]+/i) {|src|
42
+ self.push_to_images(path,Parser.clear(src[0]))
41
43
  }
42
44
  end
43
45
 
46
+ def self.clear(str)
47
+ if str =~ /url/i
48
+ str.gsub!(/^.*?url\(/,'')
49
+ str.gsub!(/\)/,'')
50
+ end
51
+ str
52
+ end
53
+
44
54
  def get_images_regexp(path,regexp)
45
55
  self.content.scan(regexp) {|src| self.push_to_images(path,src.to_s)}
46
56
  end
47
57
 
48
58
  def get_content
49
- @content = Nokogiri::HTML(open(self.argument_url, 'User-Agent' => self.user_agent))
59
+ @content = Nokogiri::HTML(self.open_url)
50
60
  end
51
61
 
52
62
  def get_images(path,h={})
@@ -112,5 +122,9 @@ module ImageDownloader
112
122
  alias all_image_places all_collect_from_methods
113
123
  end
114
124
 
125
+ def open_url
126
+ open(self.argument_url, 'User-Agent' => self.user_agent)
127
+ end
128
+
115
129
  end
116
130
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: image_downloader
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 2
10
- version: 0.2.2
9
+ - 3
10
+ version: 0.2.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Malykh Oleg
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-21 00:00:00 +04:00
18
+ date: 2011-07-25 00:00:00 +04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency