extractula 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/extractula/extractor.rb +15 -7
- data/lib/extractula.rb +1 -1
- metadata +1 -1
data/lib/extractula/extractor.rb
CHANGED
@@ -96,23 +96,31 @@ class Extractula::Extractor
|
|
96
96
|
|
97
97
|
def image_urls
|
98
98
|
if image_urls_path
|
99
|
-
html.search(image_urls_path)
|
100
|
-
src = img['src'].strip
|
101
|
-
src = "#{@url.scheme}://#{@url.host}#{src}" if src.start_with?('/')
|
102
|
-
src
|
103
|
-
end
|
99
|
+
image_srcs_from html.search(image_urls_path)
|
104
100
|
end
|
105
101
|
end
|
106
102
|
|
107
103
|
def video_embed
|
108
104
|
if video_embed_path
|
109
|
-
html.search(video_embed_path)
|
105
|
+
embed_code_from html.search(video_embed_path)
|
110
106
|
end
|
111
107
|
end
|
112
108
|
|
113
109
|
private
|
114
110
|
|
115
|
-
def
|
111
|
+
def image_srcs_from nodeset
|
112
|
+
nodeset.collect { |img| unrelativize img['src'].strip }
|
113
|
+
end
|
114
|
+
|
115
|
+
def embed_code_from nodeset
|
116
|
+
nodeset.collect { |embed| embed.to_html }.first
|
117
|
+
end
|
118
|
+
|
119
|
+
def unrelativize path
|
120
|
+
path.start_with?('/') ? "#{@url.scheme}://#{@url.host}#{path}" : path
|
121
|
+
end
|
122
|
+
|
123
|
+
def content_at path, attrib = :text, block = nil
|
116
124
|
if path
|
117
125
|
if node = html.at(path)
|
118
126
|
value = attrib == :text ? node.text.strip : node[attrib].strip
|
data/lib/extractula.rb
CHANGED