image_downloader 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,15 +32,15 @@ After installation, you can use the following code as an example:
32
32
  # download all images on page in any place (by regexp, all that look like url with image)
33
33
  downloader.parse(:any_looks_like_image => true)
34
34
 
35
- #####
35
+ ##### or
36
36
  # download images from all elements where usually images placed (<img...>, <a...>, ...)
37
37
  downloader.parse()
38
38
 
39
- #####
39
+ ##### or
40
40
  # download image from exect places in page
41
41
  downloader.parse(:collect => {:link_icon => true})
42
42
 
43
- #####
43
+ ##### or
44
44
  # download images by regexp
45
45
  downloader.parse(:regexp => /[^'"]+\.jpg/i)
46
46
 
@@ -58,7 +58,7 @@ For "parse" method available following options
58
58
  }
59
59
 
60
60
  # find by regexp
61
- :regexp => /['"]([^'"]+\.jpg)[^'"]*['"]/i)
61
+ :regexp => /['"]([^'"]+\.jpg)[^'"]*['"]/i) # for ruby 1.8 (in 1.9 not allowed () for scan method)
62
62
  :regexp => /[^'"]+\.jpg/i # the same, but shorter
63
63
  :regexp => /[^'"]+\.css/ # other files can also be downloaded
64
64
 
@@ -30,23 +30,33 @@ module ImageDownloader
30
30
  end
31
31
 
32
32
  def get_content_raw
33
- @content = open(self.argument_url, 'User-Agent' => self.user_agent).read
33
+ @content = self.open_url.read
34
34
  @content.gsub!(/[\n\r\t]+/,' ')
35
35
  end
36
36
 
37
37
  def get_images_raw(path,h={})
38
- self.content.scan(/['"]+[^'"]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')})['"]+/).map{|src|
39
- src.gsub!(/['"]/,'')
40
- self.push_to_images(path,src)
38
+ self.content.scan(/['"]+([^'"]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^'"]*['"]+/i) {|src|
39
+ self.push_to_images(path,Parser.clear(src[0]))
40
+ }
41
+ self.content.scan(/(?:href|src)=([^\s'">]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^\s'">]*[>\s]+/i) {|src|
42
+ self.push_to_images(path,Parser.clear(src[0]))
41
43
  }
42
44
  end
43
45
 
46
+ def self.clear(str)
47
+ if str =~ /url/i
48
+ str.gsub!(/^.*?url\(/,'')
49
+ str.gsub!(/\)/,'')
50
+ end
51
+ str
52
+ end
53
+
44
54
  def get_images_regexp(path,regexp)
45
55
  self.content.scan(regexp) {|src| self.push_to_images(path,src.to_s)}
46
56
  end
47
57
 
48
58
  def get_content
49
- @content = Nokogiri::HTML(open(self.argument_url, 'User-Agent' => self.user_agent))
59
+ @content = Nokogiri::HTML(self.open_url)
50
60
  end
51
61
 
52
62
  def get_images(path,h={})
@@ -112,5 +122,9 @@ module ImageDownloader
112
122
  alias all_image_places all_collect_from_methods
113
123
  end
114
124
 
125
+ def open_url
126
+ open(self.argument_url, 'User-Agent' => self.user_agent)
127
+ end
128
+
115
129
  end
116
130
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: image_downloader
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 2
10
- version: 0.2.2
9
+ - 3
10
+ version: 0.2.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Malykh Oleg
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-21 00:00:00 +04:00
18
+ date: 2011-07-25 00:00:00 +04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency