forki 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +18 -0
- data/.gitignore +17 -0
- data/.rubocop.yml +71 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +21 -0
- data/Gemfile.lock +163 -0
- data/LICENSE.txt +21 -0
- data/README.md +87 -0
- data/Rakefile +16 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/forki.gemspec +42 -0
- data/lib/forki/post.rb +61 -0
- data/lib/forki/scrapers/post_scraper.rb +360 -0
- data/lib/forki/scrapers/scraper.rb +189 -0
- data/lib/forki/scrapers/user_scraper.rb +94 -0
- data/lib/forki/user.rb +45 -0
- data/lib/forki/version.rb +5 -0
- data/lib/forki.rb +98 -0
- data/lib/generators/forki.rb +3 -0
- data/lib/generators/forki_generator.rb +6 -0
- data/lib/helpers/configuration.rb +28 -0
- data/reactions/.DS_Store +0 -0
- data/reactions/angry.png +0 -0
- data/reactions/care.png +0 -0
- data/reactions/haha.png +0 -0
- data/reactions/like.png +0 -0
- data/reactions/love.png +0 -0
- data/reactions/pride.png +0 -0
- data/reactions/sad.png +0 -0
- data/reactions/wow.png +0 -0
- metadata +146 -0
data/lib/forki/user.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Forki
|
4
|
+
class User
|
5
|
+
def self.lookup(urls = [])
|
6
|
+
urls = [urls] unless urls.kind_of?(Array)
|
7
|
+
self.scrape(urls)
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :name,
|
11
|
+
:id,
|
12
|
+
:number_of_followers,
|
13
|
+
:verified,
|
14
|
+
:profile,
|
15
|
+
:profile_link,
|
16
|
+
:profile_image_file,
|
17
|
+
:profile_image_url,
|
18
|
+
:number_of_likes
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def initialize(user_hash = {})
|
23
|
+
@name = user_hash[:name]
|
24
|
+
@id = user_hash[:id]
|
25
|
+
@number_of_followers = user_hash[:number_of_followers]
|
26
|
+
@verified = user_hash[:verified]
|
27
|
+
@profile = user_hash[:profile]
|
28
|
+
@profile_link = user_hash[:profile_link]
|
29
|
+
@profile_image_file = user_hash[:profile_image_file]
|
30
|
+
@profile_image_url = user_hash[:profile_image_url]
|
31
|
+
@number_of_likes = user_hash[:number_of_likes]
|
32
|
+
end
|
33
|
+
|
34
|
+
class << self
|
35
|
+
private
|
36
|
+
|
37
|
+
def scrape(urls)
|
38
|
+
urls.map do |url|
|
39
|
+
user_hash = Forki::UserScraper.new.parse(url)
|
40
|
+
User.new(user_hash)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/forki.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "forki/version"
|
4
|
+
|
5
|
+
# Representative objects we create
|
6
|
+
require_relative "forki/user"
|
7
|
+
require_relative "forki/post"
|
8
|
+
|
9
|
+
require "helpers/configuration"
|
10
|
+
require_relative "forki/scrapers/scraper"
|
11
|
+
|
12
|
+
module Forki
|
13
|
+
extend Configuration
|
14
|
+
|
15
|
+
@@forki_logger = Logger.new(STDOUT)
|
16
|
+
|
17
|
+
class Error < StandardError; end
|
18
|
+
class RetryableError < Error; end
|
19
|
+
|
20
|
+
class InvalidUrlError < StandardError
|
21
|
+
def initialize(msg = "Url must be a proper Facebook Url")
|
22
|
+
super
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class ContentUnavailableError < StandardError
|
27
|
+
def initialize(msg = "Post no longer available")
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class MissingCredentialsError < StandardError
|
33
|
+
def initalize(msg = "Missing FACEBOOK_EMAIL or FACEBOOK_PASSWORD environment variable")
|
34
|
+
super
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class BlockedCredentialsError < StandardError
|
39
|
+
def initalize(msg = "Credentials are most likely being blocked")
|
40
|
+
super
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class UnhandledContentError < StandardError
|
45
|
+
def initialize(msg = "Forki does not know how to handle the post")
|
46
|
+
super
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
define_setting :temp_storage_location, "tmp/forki"
|
51
|
+
|
52
|
+
# Extract the file extension from a media URL
|
53
|
+
# E.g. ".png" from https://scontent-atl3-2.xx.fbcdn.net/v/t39.30808-1.png?stp=dst-png_p148x148
|
54
|
+
def self.extract_file_extension_from_url(url)
|
55
|
+
stripped_url = url.split("?").first # remove URL query params
|
56
|
+
extension = stripped_url.split(".").last
|
57
|
+
|
58
|
+
extension = nil unless /^[a-zA-Z0-9]{3}$/.match?(extension) # extension must be a 3-character alphanumeric string
|
59
|
+
extension = ".#{extension}" unless extension.nil?
|
60
|
+
extension
|
61
|
+
end
|
62
|
+
|
63
|
+
# Get an image from a URL and save to a temp folder set in the configuration under
|
64
|
+
# temp_storage_location
|
65
|
+
def self.retrieve_media(url)
|
66
|
+
@@forki_logger.debug("Forki download started: #{url}")
|
67
|
+
start_time = Time.now
|
68
|
+
|
69
|
+
response = Typhoeus.get(url)
|
70
|
+
|
71
|
+
extension = Forki.extract_file_extension_from_url(url)
|
72
|
+
temp_file = "#{Forki.temp_storage_location}/facebook_media_#{SecureRandom.uuid}#{extension}"
|
73
|
+
|
74
|
+
# We do this in case the folder isn't created yet, since it's a temp folder we'll just do so
|
75
|
+
create_temp_storage_location
|
76
|
+
File.binwrite(temp_file, response.body)
|
77
|
+
|
78
|
+
@@forki_logger.debug("Forki download finished")
|
79
|
+
@@forki_logger.debug("Save Location: #{temp_file}")
|
80
|
+
@@forki_logger.debug("Size: #{(File.size("./#{temp_file}").to_f / 1024 / 1024).round(4)} MB")
|
81
|
+
@@forki_logger.debug("Time to Download: #{(Time.now - start_time).round(3)} seconds")
|
82
|
+
temp_file
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.create_temp_storage_location
|
86
|
+
return if File.exist?(Forki.temp_storage_location) && File.directory?(Forki.temp_storage_location)
|
87
|
+
|
88
|
+
FileUtils.mkdir_p Forki.temp_storage_location
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.set_logger_level
|
92
|
+
if ENV["RAILS_ENV"] == "test" || ENV["RAILS_ENV"] == "development"
|
93
|
+
@@forki_logger.level = Logger::INFO
|
94
|
+
else
|
95
|
+
@@forki_logger.level = Logger::WARN
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Borrowed with thanks from https://www.viget.com/articles/easy-gem-configuration-variables-with-defaults/
|
4
|
+
module Configuration
|
5
|
+
def configuration
|
6
|
+
yield self
|
7
|
+
end
|
8
|
+
|
9
|
+
def define_setting(name, default = nil)
|
10
|
+
class_variable_set("@@#{name}", default)
|
11
|
+
|
12
|
+
define_class_method "#{name}=" do |value|
|
13
|
+
class_variable_set("@@#{name}", value)
|
14
|
+
end
|
15
|
+
|
16
|
+
define_class_method name do
|
17
|
+
class_variable_get("@@#{name}")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def define_class_method(name, &block)
|
24
|
+
(class << self; self; end).instance_eval do
|
25
|
+
define_method name, &block
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/reactions/.DS_Store
ADDED
Binary file
|
data/reactions/angry.png
ADDED
Binary file
|
data/reactions/care.png
ADDED
Binary file
|
data/reactions/haha.png
ADDED
Binary file
|
data/reactions/like.png
ADDED
Binary file
|
data/reactions/love.png
ADDED
Binary file
|
data/reactions/pride.png
ADDED
Binary file
|
data/reactions/sad.png
ADDED
Binary file
|
data/reactions/wow.png
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: forki
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ''
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-05-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: capybara
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: apparition
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: typhoeus
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: oj
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: selenium-webdriver
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- ''
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- ".github/workflows/main.yml"
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rubocop.yml"
|
93
|
+
- ".ruby-version"
|
94
|
+
- CHANGELOG.md
|
95
|
+
- CODE_OF_CONDUCT.md
|
96
|
+
- Gemfile
|
97
|
+
- Gemfile.lock
|
98
|
+
- LICENSE.txt
|
99
|
+
- README.md
|
100
|
+
- Rakefile
|
101
|
+
- bin/console
|
102
|
+
- bin/setup
|
103
|
+
- forki.gemspec
|
104
|
+
- lib/forki.rb
|
105
|
+
- lib/forki/post.rb
|
106
|
+
- lib/forki/scrapers/post_scraper.rb
|
107
|
+
- lib/forki/scrapers/scraper.rb
|
108
|
+
- lib/forki/scrapers/user_scraper.rb
|
109
|
+
- lib/forki/user.rb
|
110
|
+
- lib/forki/version.rb
|
111
|
+
- lib/generators/forki.rb
|
112
|
+
- lib/generators/forki_generator.rb
|
113
|
+
- lib/helpers/configuration.rb
|
114
|
+
- reactions/.DS_Store
|
115
|
+
- reactions/angry.png
|
116
|
+
- reactions/care.png
|
117
|
+
- reactions/haha.png
|
118
|
+
- reactions/like.png
|
119
|
+
- reactions/love.png
|
120
|
+
- reactions/pride.png
|
121
|
+
- reactions/sad.png
|
122
|
+
- reactions/wow.png
|
123
|
+
homepage:
|
124
|
+
licenses:
|
125
|
+
- MIT
|
126
|
+
metadata: {}
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
require_paths:
|
130
|
+
- lib
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: 2.7.0
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubygems_version: 3.3.26
|
143
|
+
signing_key:
|
144
|
+
specification_version: 4
|
145
|
+
summary: A gem to scrape Facebook pages for archive purposes.
|
146
|
+
test_files: []
|