forki 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +18 -0
- data/.gitignore +17 -0
- data/.rubocop.yml +71 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +21 -0
- data/Gemfile.lock +163 -0
- data/LICENSE.txt +21 -0
- data/README.md +87 -0
- data/Rakefile +16 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/forki.gemspec +42 -0
- data/lib/forki/post.rb +61 -0
- data/lib/forki/scrapers/post_scraper.rb +360 -0
- data/lib/forki/scrapers/scraper.rb +189 -0
- data/lib/forki/scrapers/user_scraper.rb +94 -0
- data/lib/forki/user.rb +45 -0
- data/lib/forki/version.rb +5 -0
- data/lib/forki.rb +98 -0
- data/lib/generators/forki.rb +3 -0
- data/lib/generators/forki_generator.rb +6 -0
- data/lib/helpers/configuration.rb +28 -0
- data/reactions/.DS_Store +0 -0
- data/reactions/angry.png +0 -0
- data/reactions/care.png +0 -0
- data/reactions/haha.png +0 -0
- data/reactions/like.png +0 -0
- data/reactions/love.png +0 -0
- data/reactions/pride.png +0 -0
- data/reactions/sad.png +0 -0
- data/reactions/wow.png +0 -0
- metadata +146 -0
data/lib/forki/user.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Forki
|
4
|
+
class User
|
5
|
+
def self.lookup(urls = [])
|
6
|
+
urls = [urls] unless urls.kind_of?(Array)
|
7
|
+
self.scrape(urls)
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :name,
|
11
|
+
:id,
|
12
|
+
:number_of_followers,
|
13
|
+
:verified,
|
14
|
+
:profile,
|
15
|
+
:profile_link,
|
16
|
+
:profile_image_file,
|
17
|
+
:profile_image_url,
|
18
|
+
:number_of_likes
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def initialize(user_hash = {})
|
23
|
+
@name = user_hash[:name]
|
24
|
+
@id = user_hash[:id]
|
25
|
+
@number_of_followers = user_hash[:number_of_followers]
|
26
|
+
@verified = user_hash[:verified]
|
27
|
+
@profile = user_hash[:profile]
|
28
|
+
@profile_link = user_hash[:profile_link]
|
29
|
+
@profile_image_file = user_hash[:profile_image_file]
|
30
|
+
@profile_image_url = user_hash[:profile_image_url]
|
31
|
+
@number_of_likes = user_hash[:number_of_likes]
|
32
|
+
end
|
33
|
+
|
34
|
+
class << self
|
35
|
+
private
|
36
|
+
|
37
|
+
def scrape(urls)
|
38
|
+
urls.map do |url|
|
39
|
+
user_hash = Forki::UserScraper.new.parse(url)
|
40
|
+
User.new(user_hash)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/forki.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "forki/version"
|
4
|
+
|
5
|
+
# Representative objects we create
|
6
|
+
require_relative "forki/user"
|
7
|
+
require_relative "forki/post"
|
8
|
+
|
9
|
+
require "helpers/configuration"
|
10
|
+
require_relative "forki/scrapers/scraper"
|
11
|
+
|
12
|
+
module Forki
|
13
|
+
extend Configuration
|
14
|
+
|
15
|
+
@@forki_logger = Logger.new(STDOUT)
|
16
|
+
|
17
|
+
class Error < StandardError; end
|
18
|
+
class RetryableError < Error; end
|
19
|
+
|
20
|
+
class InvalidUrlError < StandardError
|
21
|
+
def initialize(msg = "Url must be a proper Facebook Url")
|
22
|
+
super
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class ContentUnavailableError < StandardError
|
27
|
+
def initialize(msg = "Post no longer available")
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class MissingCredentialsError < StandardError
|
33
|
+
def initalize(msg = "Missing FACEBOOK_EMAIL or FACEBOOK_PASSWORD environment variable")
|
34
|
+
super
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class BlockedCredentialsError < StandardError
|
39
|
+
def initalize(msg = "Credentials are most likely being blocked")
|
40
|
+
super
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class UnhandledContentError < StandardError
|
45
|
+
def initialize(msg = "Forki does not know how to handle the post")
|
46
|
+
super
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
define_setting :temp_storage_location, "tmp/forki"
|
51
|
+
|
52
|
+
# Extract the file extension from a media URL
|
53
|
+
# E.g. ".png" from https://scontent-atl3-2.xx.fbcdn.net/v/t39.30808-1.png?stp=dst-png_p148x148
|
54
|
+
def self.extract_file_extension_from_url(url)
|
55
|
+
stripped_url = url.split("?").first # remove URL query params
|
56
|
+
extension = stripped_url.split(".").last
|
57
|
+
|
58
|
+
extension = nil unless /^[a-zA-Z0-9]{3}$/.match?(extension) # extension must be a 3-character alphanumeric string
|
59
|
+
extension = ".#{extension}" unless extension.nil?
|
60
|
+
extension
|
61
|
+
end
|
62
|
+
|
63
|
+
# Get an image from a URL and save to a temp folder set in the configuration under
|
64
|
+
# temp_storage_location
|
65
|
+
def self.retrieve_media(url)
|
66
|
+
@@forki_logger.debug("Forki download started: #{url}")
|
67
|
+
start_time = Time.now
|
68
|
+
|
69
|
+
response = Typhoeus.get(url)
|
70
|
+
|
71
|
+
extension = Forki.extract_file_extension_from_url(url)
|
72
|
+
temp_file = "#{Forki.temp_storage_location}/facebook_media_#{SecureRandom.uuid}#{extension}"
|
73
|
+
|
74
|
+
# We do this in case the folder isn't created yet, since it's a temp folder we'll just do so
|
75
|
+
create_temp_storage_location
|
76
|
+
File.binwrite(temp_file, response.body)
|
77
|
+
|
78
|
+
@@forki_logger.debug("Forki download finished")
|
79
|
+
@@forki_logger.debug("Save Location: #{temp_file}")
|
80
|
+
@@forki_logger.debug("Size: #{(File.size("./#{temp_file}").to_f / 1024 / 1024).round(4)} MB")
|
81
|
+
@@forki_logger.debug("Time to Download: #{(Time.now - start_time).round(3)} seconds")
|
82
|
+
temp_file
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.create_temp_storage_location
|
86
|
+
return if File.exist?(Forki.temp_storage_location) && File.directory?(Forki.temp_storage_location)
|
87
|
+
|
88
|
+
FileUtils.mkdir_p Forki.temp_storage_location
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.set_logger_level
|
92
|
+
if ENV["RAILS_ENV"] == "test" || ENV["RAILS_ENV"] == "development"
|
93
|
+
@@forki_logger.level = Logger::INFO
|
94
|
+
else
|
95
|
+
@@forki_logger.level = Logger::WARN
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Borrowed with thanks from https://www.viget.com/articles/easy-gem-configuration-variables-with-defaults/
|
4
|
+
module Configuration
|
5
|
+
def configuration
|
6
|
+
yield self
|
7
|
+
end
|
8
|
+
|
9
|
+
def define_setting(name, default = nil)
|
10
|
+
class_variable_set("@@#{name}", default)
|
11
|
+
|
12
|
+
define_class_method "#{name}=" do |value|
|
13
|
+
class_variable_set("@@#{name}", value)
|
14
|
+
end
|
15
|
+
|
16
|
+
define_class_method name do
|
17
|
+
class_variable_get("@@#{name}")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def define_class_method(name, &block)
|
24
|
+
(class << self; self; end).instance_eval do
|
25
|
+
define_method name, &block
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/reactions/.DS_Store
ADDED
Binary file
|
data/reactions/angry.png
ADDED
Binary file
|
data/reactions/care.png
ADDED
Binary file
|
data/reactions/haha.png
ADDED
Binary file
|
data/reactions/like.png
ADDED
Binary file
|
data/reactions/love.png
ADDED
Binary file
|
data/reactions/pride.png
ADDED
Binary file
|
data/reactions/sad.png
ADDED
Binary file
|
data/reactions/wow.png
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: forki
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ''
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-05-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: capybara
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: apparition
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: typhoeus
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: oj
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: selenium-webdriver
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- ''
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- ".github/workflows/main.yml"
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rubocop.yml"
|
93
|
+
- ".ruby-version"
|
94
|
+
- CHANGELOG.md
|
95
|
+
- CODE_OF_CONDUCT.md
|
96
|
+
- Gemfile
|
97
|
+
- Gemfile.lock
|
98
|
+
- LICENSE.txt
|
99
|
+
- README.md
|
100
|
+
- Rakefile
|
101
|
+
- bin/console
|
102
|
+
- bin/setup
|
103
|
+
- forki.gemspec
|
104
|
+
- lib/forki.rb
|
105
|
+
- lib/forki/post.rb
|
106
|
+
- lib/forki/scrapers/post_scraper.rb
|
107
|
+
- lib/forki/scrapers/scraper.rb
|
108
|
+
- lib/forki/scrapers/user_scraper.rb
|
109
|
+
- lib/forki/user.rb
|
110
|
+
- lib/forki/version.rb
|
111
|
+
- lib/generators/forki.rb
|
112
|
+
- lib/generators/forki_generator.rb
|
113
|
+
- lib/helpers/configuration.rb
|
114
|
+
- reactions/.DS_Store
|
115
|
+
- reactions/angry.png
|
116
|
+
- reactions/care.png
|
117
|
+
- reactions/haha.png
|
118
|
+
- reactions/like.png
|
119
|
+
- reactions/love.png
|
120
|
+
- reactions/pride.png
|
121
|
+
- reactions/sad.png
|
122
|
+
- reactions/wow.png
|
123
|
+
homepage:
|
124
|
+
licenses:
|
125
|
+
- MIT
|
126
|
+
metadata: {}
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
require_paths:
|
130
|
+
- lib
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: 2.7.0
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubygems_version: 3.3.26
|
143
|
+
signing_key:
|
144
|
+
specification_version: 4
|
145
|
+
summary: A gem to scrape Facebook pages for archive purposes.
|
146
|
+
test_files: []
|