image_downloader 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +4 -4
- data/lib/image_downloader/parser.rb +19 -5
- metadata +4 -4
data/README.rdoc
CHANGED
@@ -32,15 +32,15 @@ After installation, you can use the following code as an example:
|
|
32
32
|
# download all images on page in any place (by regexp, all that look like url with image)
|
33
33
|
downloader.parse(:any_looks_like_image => true)
|
34
34
|
|
35
|
-
#####
|
35
|
+
##### or
|
36
36
|
# download images from all elements where usually images placed (<img...>, <a...>, ...)
|
37
37
|
downloader.parse()
|
38
38
|
|
39
|
-
#####
|
39
|
+
##### or
|
40
40
|
# download image from exect places in page
|
41
41
|
downloader.parse(:collect => {:link_icon => true})
|
42
42
|
|
43
|
-
#####
|
43
|
+
##### or
|
44
44
|
# download images by regexp
|
45
45
|
downloader.parse(:regexp => /[^'"]+\.jpg/i)
|
46
46
|
|
@@ -58,7 +58,7 @@ For "parse" method available following options
|
|
58
58
|
}
|
59
59
|
|
60
60
|
# find by regexp
|
61
|
-
:regexp => /['"]([^'"]+\.jpg)[^'"]*['"]/i)
|
61
|
+
:regexp => /['"]([^'"]+\.jpg)[^'"]*['"]/i) # for ruby 1.8 (in 1.9 not allowed () for scan method)
|
62
62
|
:regexp => /[^'"]+\.jpg/i # the same, but shorter
|
63
63
|
:regexp => /[^'"]+\.css/ # other files can also be downloaded
|
64
64
|
|
@@ -30,23 +30,33 @@ module ImageDownloader
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def get_content_raw
|
33
|
-
@content =
|
33
|
+
@content = self.open_url.read
|
34
34
|
@content.gsub!(/[\n\r\t]+/,' ')
|
35
35
|
end
|
36
36
|
|
37
37
|
def get_images_raw(path,h={})
|
38
|
-
self.content.scan(/['"]+[^'"]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')})['"]+/)
|
39
|
-
|
40
|
-
|
38
|
+
self.content.scan(/['"]+([^'"]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^'"]*['"]+/i) {|src|
|
39
|
+
self.push_to_images(path,Parser.clear(src[0]))
|
40
|
+
}
|
41
|
+
self.content.scan(/(?:href|src)=([^\s'">]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^\s'">]*[>\s]+/i) {|src|
|
42
|
+
self.push_to_images(path,Parser.clear(src[0]))
|
41
43
|
}
|
42
44
|
end
|
43
45
|
|
46
|
+
def self.clear(str)
|
47
|
+
if str =~ /url/i
|
48
|
+
str.gsub!(/^.*?url\(/,'')
|
49
|
+
str.gsub!(/\)/,'')
|
50
|
+
end
|
51
|
+
str
|
52
|
+
end
|
53
|
+
|
44
54
|
def get_images_regexp(path,regexp)
|
45
55
|
self.content.scan(regexp) {|src| self.push_to_images(path,src.to_s)}
|
46
56
|
end
|
47
57
|
|
48
58
|
def get_content
|
49
|
-
@content = Nokogiri::HTML(
|
59
|
+
@content = Nokogiri::HTML(self.open_url)
|
50
60
|
end
|
51
61
|
|
52
62
|
def get_images(path,h={})
|
@@ -112,5 +122,9 @@ module ImageDownloader
|
|
112
122
|
alias all_image_places all_collect_from_methods
|
113
123
|
end
|
114
124
|
|
125
|
+
def open_url
|
126
|
+
open(self.argument_url, 'User-Agent' => self.user_agent)
|
127
|
+
end
|
128
|
+
|
115
129
|
end
|
116
130
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: image_downloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 3
|
10
|
+
version: 0.2.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Malykh Oleg
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07-
|
18
|
+
date: 2011-07-25 00:00:00 +04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|