rypper 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rypper/cli.rb +5 -2
- data/lib/rypper/extractor.rb +30 -2
- metadata +2 -2
data/lib/rypper/cli.rb
CHANGED
@@ -43,10 +43,13 @@ module Rypper
|
|
43
43
|
puts " * #{html_uri} ..."
|
44
44
|
html = Rypper::Loader.get(html_uri)
|
45
45
|
if html.is_a?(String)
|
46
|
-
extractor.extract!(html).each do |image_uri|
|
46
|
+
extractor.extract!(html_uri, html).each do |image_uri|
|
47
47
|
if image_uri.is_a?(String)
|
48
|
-
print " * #{image_uri} ..."
|
49
48
|
image_path = uri.to_path(File.extname(image_uri))
|
49
|
+
if opts.has_key?(:output)
|
50
|
+
image_path = File.join(opts[:output], image_path)
|
51
|
+
end
|
52
|
+
print " * #{image_uri} --> #{image_path} ..."
|
50
53
|
if !File.exists?(image_path) || opts.has_key?(:overwrite)
|
51
54
|
Rypper::Loader.mkdir!(File.dirname(image_path))
|
52
55
|
image_file = File.open(image_path, 'w')
|
data/lib/rypper/extractor.rb
CHANGED
@@ -4,13 +4,27 @@ module Rypper
|
|
4
4
|
class Extractor
|
5
5
|
attr_reader :selector
|
6
6
|
|
7
|
+
def self.dirname(uri)
|
8
|
+
uri = uri.to_s
|
9
|
+
dirname = nil
|
10
|
+
if File.extname(uri).empty?
|
11
|
+
dirname = uri
|
12
|
+
else
|
13
|
+
dirname = File.dirname(uri)
|
14
|
+
end
|
15
|
+
dirname.chomp!('/')
|
16
|
+
dirname.concat('/')
|
17
|
+
dirname
|
18
|
+
end
|
19
|
+
|
7
20
|
def initialize(selector)
|
8
21
|
@selector = selector
|
9
22
|
end
|
10
23
|
|
11
|
-
def extract!(html)
|
24
|
+
def extract!(uri, html)
|
25
|
+
uri = uri.to_s
|
12
26
|
unless html.kind_of?(Nokogiri::HTML::Document)
|
13
|
-
html = Nokogiri::HTML(html)
|
27
|
+
html = Nokogiri::HTML(html.to_s)
|
14
28
|
end
|
15
29
|
res = []
|
16
30
|
elems = html.search(self.selector)
|
@@ -26,6 +40,20 @@ module Rypper
|
|
26
40
|
else
|
27
41
|
res = elems
|
28
42
|
end
|
43
|
+
res.map! do |elem|
|
44
|
+
if elem.is_a?(String)
|
45
|
+
elem_uri = ::URI.parse(elem)
|
46
|
+
if !elem_uri.absolute?
|
47
|
+
elem_uri = ::URI.join(self.dirname(uri), elem)
|
48
|
+
elsif elem_uri.instance_of?(::URI::Generic) # absolute path only
|
49
|
+
elem_uri = ::URI.parse(uri)
|
50
|
+
elem_uri.path = elem
|
51
|
+
end
|
52
|
+
elem_uri.to_s
|
53
|
+
else
|
54
|
+
elem
|
55
|
+
end
|
56
|
+
end
|
29
57
|
res
|
30
58
|
end
|
31
59
|
end
|