extractula 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/extractula/extractor.rb +15 -7
- data/lib/extractula.rb +1 -1
- metadata +1 -1
data/lib/extractula/extractor.rb
CHANGED
@@ -96,23 +96,31 @@ class Extractula::Extractor
|
|
96
96
|
|
97
97
|
def image_urls
|
98
98
|
if image_urls_path
|
99
|
-
html.search(image_urls_path)
|
100
|
-
src = img['src'].strip
|
101
|
-
src = "#{@url.scheme}://#{@url.host}#{src}" if src.start_with?('/')
|
102
|
-
src
|
103
|
-
end
|
99
|
+
image_srcs_from html.search(image_urls_path)
|
104
100
|
end
|
105
101
|
end
|
106
102
|
|
107
103
|
def video_embed
|
108
104
|
if video_embed_path
|
109
|
-
html.search(video_embed_path)
|
105
|
+
embed_code_from html.search(video_embed_path)
|
110
106
|
end
|
111
107
|
end
|
112
108
|
|
113
109
|
private
|
114
110
|
|
115
|
-
def
|
111
|
+
def image_srcs_from nodeset
|
112
|
+
nodeset.collect { |img| unrelativize img['src'].strip }
|
113
|
+
end
|
114
|
+
|
115
|
+
def embed_code_from nodeset
|
116
|
+
nodeset.collect { |embed| embed.to_html }.first
|
117
|
+
end
|
118
|
+
|
119
|
+
def unrelativize path
|
120
|
+
path.start_with?('/') ? "#{@url.scheme}://#{@url.host}#{path}" : path
|
121
|
+
end
|
122
|
+
|
123
|
+
def content_at path, attrib = :text, block = nil
|
116
124
|
if path
|
117
125
|
if node = html.at(path)
|
118
126
|
value = attrib == :text ? node.text.strip : node[attrib].strip
|
data/lib/extractula.rb
CHANGED