mosquito-scrape 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/mosquito/scrapers/tweet_scraper.rb +10 -7
- data/lib/mosquito/version.rb +1 -1
- data/lib/mosquito.rb +12 -9
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c00016d7bb5c8cba182ee58439c0e4291abdf057bdfd82210426549c57a97ff
|
4
|
+
data.tar.gz: a1c4a7bc39cfc67760ef425f45d8ac5ed04f123086692b0ce0bd762f104baa18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7818a6933e022965d43028fb4706be4694134088004aea6c57dc11040346ddfa022378b8e2a9da9c0e343985d63d5aa7222d0ce66747d10edd09defc180aac9
|
7
|
+
data.tar.gz: 0564c11d892c5111582c165f858441678168219c32ecc20d63b8eb67cb0e9d655beab95881186f456ea2ccf0ad46fab8d6a6a40cfa376c8a1b82db2733b6ad55
|
@@ -42,26 +42,29 @@ module Mosquito
|
|
42
42
|
video_preview_image = nil
|
43
43
|
video_file_type = nil
|
44
44
|
|
45
|
-
# Single image
|
46
|
-
|
45
|
+
# # Single image
|
46
|
+
# image_url = doc.xpath("//div[contains(@class, 'main-tweet')]/div/div/div/div/div/a[contains(@class, 'still-image')]/@href").first&.content
|
47
|
+
# images << Mosquito.retrieve_media("#{Capybara.app_host}#{image_url}") unless image_url.nil?
|
48
|
+
|
49
|
+
# debugger
|
47
50
|
|
48
51
|
# Slideshow
|
49
52
|
nodes = doc.xpath("//div[contains(@class, 'main-tweet')]/div/div/div[contains(@class, 'attachments')]/div[contains(@class, 'gallery-row')]/div/a/@href")
|
50
|
-
images.concat(nodes.map { |node| "#{Capybara.app_host}#{node.value}" })
|
53
|
+
images.concat(nodes.map { |node| Mosquito.retrieve_media("#{Capybara.app_host}#{node.value}") })
|
51
54
|
|
52
55
|
# Video
|
53
56
|
nodes = doc.xpath("//div[contains(@class, 'main-tweet')]/div/div/div[contains(@class, 'attachments')]/div[contains(@class, 'gallery-video')]/div/video")
|
54
57
|
unless nodes.empty?
|
55
|
-
video_preview_image = nodes.first["poster"]
|
56
|
-
videos.concat(nodes.map { |node|
|
58
|
+
video_preview_image = Mosquito.retrieve_media("#{Capybara.app_host}#{nodes.first["poster"]}", extension: ".jpg")
|
59
|
+
videos.concat(nodes.map { |node| Mosquito.retrieve_media(node.xpath("//source").first["src"]) })
|
57
60
|
video_file_type = "video" # This is always video now, sing a gif isn't displayed differently
|
58
61
|
end
|
59
62
|
|
60
63
|
# GIF
|
61
64
|
nodes = doc.xpath("//div[contains(@class, 'main-tweet')]/div/div/div[contains(@class, 'attachments')]/div[contains(@class, 'gallery-gif')]/div/video")
|
62
65
|
unless nodes.empty?
|
63
|
-
video_preview_image = nodes.first["poster"]
|
64
|
-
videos.concat(nodes.map { |node| "#{Capybara.app_host}#{node.xpath("//source[1]/source/@src").first&.content}" })
|
66
|
+
video_preview_image = Mosquito.retrieve_media(nodes.first["poster"], extension: ".jpg")
|
67
|
+
videos.concat(nodes.map { |node| Mosquito.retrieve_media("#{Capybara.app_host}#{node.xpath("//source[1]/source/@src").first&.content}") })
|
65
68
|
video_file_type = "gif"
|
66
69
|
end
|
67
70
|
|
data/lib/mosquito/version.rb
CHANGED
data/lib/mosquito.rb
CHANGED
@@ -49,20 +49,23 @@ module Mosquito
|
|
49
49
|
|
50
50
|
# Get media from a URL and save to a temp folder set in the configuration under
|
51
51
|
# temp_storage_location
|
52
|
-
def self.retrieve_media(url)
|
52
|
+
def self.retrieve_media(url, extension: nil)
|
53
|
+
return "" if url.nil?
|
53
54
|
return "" if !Mosquito.save_media
|
54
55
|
|
55
56
|
response = Typhoeus.get(url)
|
56
57
|
|
57
58
|
# Get the file extension if it's in the file
|
58
|
-
extension
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
59
|
+
if extension.nil?
|
60
|
+
extension = url.split(".").last
|
61
|
+
|
62
|
+
# Do some basic checks so we just empty out if there's something weird in the file extension
|
63
|
+
# that could do some harm.
|
64
|
+
if extension.length.positive?
|
65
|
+
extension = extension[0...extension.index("?")]
|
66
|
+
extension = nil unless /^[a-zA-Z0-9]+$/.match?(extension)
|
67
|
+
extension = ".#{extension}" unless extension.nil?
|
68
|
+
end
|
66
69
|
end
|
67
70
|
|
68
71
|
temp_file_name = "#{Mosquito.temp_storage_location}/#{SecureRandom.uuid}#{extension}"
|