forki 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -6
- data/bin/generate_sieve +79 -0
- data/bin/generator_templates/image_sieve_template.rb.erb +63 -0
- data/bin/generator_templates/image_sieve_test_template.rb.erb +42 -0
- data/bin/generator_templates/video_sieve_template.rb.erb +61 -0
- data/bin/generator_templates/video_sieve_test_template.rb.erb +41 -0
- data/forki.gemspec +2 -0
- data/lib/forki/scrapers/post_scraper.rb +25 -7
- data/lib/forki/scrapers/scraper.rb +69 -20
- data/lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb +25 -0
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve.rb +24 -0
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb +66 -0
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb +91 -0
- data/lib/forki/scrapers/user_scraper.rb +20 -5
- data/lib/forki/version.rb +1 -1
- metadata +26 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a1b2f6a831ebac1bf9e79cc33818aa3d8f638459f538ff4f0cb92bab55b16df
|
4
|
+
data.tar.gz: 1df3b090db0ba37ecfbcf17933d41334620e9542a75f3328d9a335adec5d1ae9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b65c46157a6d1f320345d0216d138239c9d73cb29fd7fc0fa504655e0c789021a558d2e75a0447f026dc694e4fb95765cf12f8b2350e147b1011e670cd40621f
|
7
|
+
data.tar.gz: bc62a4decd0205e75ac93038d5768ff4d9ec55d8a25980dbb8522b3b2296f84e50db7caa3ba8aeb28d61eb482957b4d4bf68a9d9658cc26f4209198a64b1fe04
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
forki (0.1.
|
4
|
+
forki (0.1.4)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -37,7 +37,7 @@ GEM
|
|
37
37
|
ast (2.4.2)
|
38
38
|
builder (3.2.4)
|
39
39
|
byebug (11.1.3)
|
40
|
-
capybara (3.39.
|
40
|
+
capybara (3.39.2)
|
41
41
|
addressable
|
42
42
|
matrix
|
43
43
|
mini_mime (>= 0.1.3)
|
@@ -66,11 +66,11 @@ GEM
|
|
66
66
|
minitest (5.18.0)
|
67
67
|
nokogiri (1.15.1-arm64-darwin)
|
68
68
|
racc (~> 1.4)
|
69
|
-
oj (3.
|
69
|
+
oj (3.15.1)
|
70
70
|
parallel (1.23.0)
|
71
71
|
parser (3.2.2.1)
|
72
72
|
ast (~> 2.4.1)
|
73
|
-
public_suffix (5.0.
|
73
|
+
public_suffix (5.0.3)
|
74
74
|
racc (1.6.2)
|
75
75
|
rack (2.2.4)
|
76
76
|
rack-test (2.1.0)
|
@@ -127,7 +127,7 @@ GEM
|
|
127
127
|
rubocop-rails (~> 2.0)
|
128
128
|
ruby-progressbar (1.13.0)
|
129
129
|
rubyzip (2.3.2)
|
130
|
-
selenium-webdriver (4.
|
130
|
+
selenium-webdriver (4.10.0)
|
131
131
|
rexml (~> 3.2, >= 3.2.5)
|
132
132
|
rubyzip (>= 1.2.2, < 3.0)
|
133
133
|
websocket (~> 1.0)
|
@@ -138,7 +138,7 @@ GEM
|
|
138
138
|
concurrent-ruby (~> 1.0)
|
139
139
|
unicode-display_width (2.4.2)
|
140
140
|
websocket (1.2.9)
|
141
|
-
websocket-driver (0.7.
|
141
|
+
websocket-driver (0.7.6)
|
142
142
|
websocket-extensions (>= 0.1.0)
|
143
143
|
websocket-extensions (0.1.5)
|
144
144
|
xpath (3.2.0)
|
@@ -160,6 +160,7 @@ DEPENDENCIES
|
|
160
160
|
rubocop (~> 1.7)
|
161
161
|
rubocop-rails (~> 2.17.3)
|
162
162
|
rubocop-rails_config
|
163
|
+
thor
|
163
164
|
|
164
165
|
BUNDLED WITH
|
165
166
|
2.3.11
|
data/bin/generate_sieve
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "forki"
|
6
|
+
require "thor"
|
7
|
+
require "erb"
|
8
|
+
|
9
|
+
class GenerateSieve < Thor
|
10
|
+
|
11
|
+
desc "generate TYPE NAME", "generate a TYPE named NAME"
|
12
|
+
option :video
|
13
|
+
option :image
|
14
|
+
def generate(type, name)
|
15
|
+
case type
|
16
|
+
when "post"
|
17
|
+
if !options[:video].nil?
|
18
|
+
style = :video
|
19
|
+
elsif !options[:image].nil?
|
20
|
+
style = :image
|
21
|
+
else
|
22
|
+
puts "Must indicate either video or image flag"
|
23
|
+
exit
|
24
|
+
end
|
25
|
+
|
26
|
+
SieveGenerator.generate_post_sieve(name, style)
|
27
|
+
when "user"
|
28
|
+
SieveGenerator.generate_user_sieve(name)
|
29
|
+
else
|
30
|
+
puts "Type must be `post` or `user` only. `#{type}` passed in."
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class SieveGenerator
|
37
|
+
def self.generate_post_sieve(name, style)
|
38
|
+
puts "Generating post sieve named #{name} with style #{style}"
|
39
|
+
|
40
|
+
file_path = "./lib/forki/scrapers/sieves/"
|
41
|
+
test_path = "./test/sieves/"
|
42
|
+
case style
|
43
|
+
when :video
|
44
|
+
test_data_valid_path = "#{test_path}/video_sieves/test_data/video_sieve_#{name}_valid.json"
|
45
|
+
test_data_invalid_path = "#{test_path}/video_sieves/test_data/video_sieve_#{name}_invalid.json"
|
46
|
+
|
47
|
+
file_path += "video_sieves/video_sieve_#{name}.rb"
|
48
|
+
test_path += "video_sieves/video_sieve_#{name}_test.rb"
|
49
|
+
file_template = File.read("./bin/generator_templates/video_sieve_template.rb.erb")
|
50
|
+
test_file_template = File.read("./bin/generator_templates/video_sieve_test_template.rb.erb")
|
51
|
+
when :image
|
52
|
+
test_data_valid_path = "#{test_path}/image_sieves/test_data/image_sieve_#{name}_valid.json"
|
53
|
+
test_data_invalid_path = "#{test_path}/image_sieves/test_data/image_sieve_#{name}_invalid.json"
|
54
|
+
|
55
|
+
file_path += "image_sieves/image_sieve_#{name}.rb"
|
56
|
+
test_path += "image_sieves/image_sieve_#{name}_test.rb"
|
57
|
+
file_template = File.read("./bin/generator_templates/image_sieve_template.rb.erb")
|
58
|
+
test_file_template = File.read("./bin/generator_templates/image_sieve_test_template.rb.erb")
|
59
|
+
end
|
60
|
+
|
61
|
+
file_contents = ERB.new(file_template)
|
62
|
+
test_file_contents = ERB.new(test_file_template)
|
63
|
+
|
64
|
+
camel_name = name.split('_').collect(&:capitalize).join
|
65
|
+
|
66
|
+
File.write(file_path, file_contents.result(binding))
|
67
|
+
File.write(test_path, test_file_contents.result(binding))
|
68
|
+
|
69
|
+
File.write(test_data_valid_path, "")
|
70
|
+
File.write(test_data_invalid_path, "")
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.generate_user_sieve(name)
|
74
|
+
puts "Generating user sieve named #{name}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
GenerateSieve.start(ARGV)
|
79
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# NOTE: This is not implemented yet, just here for filler
|
2
|
+
|
3
|
+
class ImageSieve<%= camel_name %> < ImageSieve
|
4
|
+
# To check if it's valid for the inputted graphql objects
|
5
|
+
def self.check(graphql_objects)
|
6
|
+
image_object = self.extractor(graphql_objects)
|
7
|
+
|
8
|
+
true
|
9
|
+
rescue StandardError
|
10
|
+
return false
|
11
|
+
end
|
12
|
+
|
13
|
+
# output the expected format of:
|
14
|
+
#
|
15
|
+
# post_details = {
|
16
|
+
# id: video_object["id"],
|
17
|
+
# num_comments: num_comments,
|
18
|
+
# num_shares: share_count_object.fetch("count", nil),
|
19
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
20
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
21
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
22
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
23
|
+
# text: text,
|
24
|
+
# created_at: creation_date,
|
25
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
26
|
+
# has_video: true
|
27
|
+
# }
|
28
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
29
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
30
|
+
# post_details[:reactions] = reaction_counts
|
31
|
+
|
32
|
+
def self.sieve(graphql_objects)
|
33
|
+
image_object = self.extractor(graphql_objects)
|
34
|
+
|
35
|
+
post_details = {
|
36
|
+
id: nil,
|
37
|
+
num_comments: nil,
|
38
|
+
num_shared: nil,
|
39
|
+
num_views: nil,
|
40
|
+
reshare_warning: nil,
|
41
|
+
video_preview_image_url: nil,
|
42
|
+
video_url: nil,
|
43
|
+
text: nil,
|
44
|
+
created_at: nil,
|
45
|
+
profile_link: nil,
|
46
|
+
has_video: false,
|
47
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
48
|
+
video_file: Forki.retrieve_media(video_url),
|
49
|
+
reactions: nil
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def self.extractor(graphql_objects)
|
56
|
+
image_objects = graphql_objects.filter do |go|
|
57
|
+
# go = go.first if go.kind_of?(Array) && !go.empty?
|
58
|
+
# go.has_key?("image")
|
59
|
+
end
|
60
|
+
|
61
|
+
# image_objects.first.dig("image", "creation_story")
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "test_helper"
|
4
|
+
|
5
|
+
# rubocop:disable Metrics/ClassLength
|
6
|
+
class ImageSieve<%= camel_name %>Test < Minitest::Test
|
7
|
+
def setup
|
8
|
+
@valid_json = JSON.parse(File.read("test/sieves/image_sieves/test_data/image_sieve_<%= name %>_valid.json"))
|
9
|
+
@invalid_json = JSON.parse(File.read("test/sieves/image_sieves/test_data/image_sieve_<%= name %>_invalid.json"))
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_sieve_properly_fails_check
|
13
|
+
assert ImageSieve<%= camel_name %>.check(@invalid_json) == false
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_sieve_properly_passes_check
|
17
|
+
assert ImageSieve<%= camel_name %>.check(@valid_json)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_sieve_can_sieve_properly
|
21
|
+
result = ImageSieve<%= camel_name %>.sieve(@valid_json)
|
22
|
+
|
23
|
+
# TODO: Update the values for the post you're testing
|
24
|
+
# MAINTAINER TODO: FIX THIS FOR IMAGES
|
25
|
+
assert_equal "394367115960503", result[:id]
|
26
|
+
assert_equal 173, result[:num_comments]
|
27
|
+
assert_equal nil, result[:num_shared]
|
28
|
+
assert_equal nil, result[:num_views]
|
29
|
+
assert_equal false, result[:reshare_warning]
|
30
|
+
assert_not_nil result[:video_preview_image_url]
|
31
|
+
assert_not_nil result[:video_url]
|
32
|
+
assert_equal nil, text
|
33
|
+
assert_equal 1654989063, result[:created_at]
|
34
|
+
assert_equal nil, result[:profile_link]
|
35
|
+
assert_equal false, result[:has_video]
|
36
|
+
assert_not_nil result[:video_preview_image_file]
|
37
|
+
assert_not_nil result[:video_file]
|
38
|
+
assert_not_nil result[:reactions]
|
39
|
+
|
40
|
+
assert result[:reactions].kind_of?(Array)
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class VideoSieve<%= camel_name %> < VideoSieve
|
2
|
+
# To check if it's valid for the inputted graphql objects
|
3
|
+
def self.check(graphql_objects)
|
4
|
+
video_object = self.extractor(graphql_objects)
|
5
|
+
|
6
|
+
true
|
7
|
+
rescue StandardError
|
8
|
+
return false
|
9
|
+
end
|
10
|
+
|
11
|
+
# output the expected format of:
|
12
|
+
#
|
13
|
+
# post_details = {
|
14
|
+
# id: video_object["id"],
|
15
|
+
# num_comments: num_comments,
|
16
|
+
# num_shares: share_count_object.fetch("count", nil),
|
17
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
18
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
19
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
20
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
21
|
+
# text: text,
|
22
|
+
# created_at: creation_date,
|
23
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
24
|
+
# has_video: true
|
25
|
+
# }
|
26
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
27
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
28
|
+
# post_details[:reactions] = reaction_counts
|
29
|
+
|
30
|
+
def self.sieve(graphql_objects)
|
31
|
+
video_object = self.extractor(graphql_objects)
|
32
|
+
|
33
|
+
post_details = {
|
34
|
+
id: nil,
|
35
|
+
num_comments: nil,
|
36
|
+
num_shared: nil,
|
37
|
+
num_views: nil,
|
38
|
+
reshare_warning: nil,
|
39
|
+
video_preview_image_url: nil,
|
40
|
+
video_url: nil,
|
41
|
+
text: nil,
|
42
|
+
created_at: nil,
|
43
|
+
profile_link: nil,
|
44
|
+
has_video: true,
|
45
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
46
|
+
video_file: Forki.retrieve_media(video_url),
|
47
|
+
reactions: nil
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def self.extractor(graphql_objects)
|
54
|
+
video_objects = graphql_objects.filter do |go|
|
55
|
+
# go = go.first if go.kind_of?(Array) && !go.empty?
|
56
|
+
# go.has_key?("video")
|
57
|
+
end
|
58
|
+
|
59
|
+
# video_objects.first.dig("video", "creation_story")
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "test_helper"
|
4
|
+
|
5
|
+
# rubocop:disable Metrics/ClassLength
|
6
|
+
class VideoSieve<%= camel_name %>Test < Minitest::Test
|
7
|
+
def setup
|
8
|
+
@valid_json = JSON.parse(File.read("test/sieves/video_sieves/test_data/video_sieve_<%= name %>_valid.json"))
|
9
|
+
@invalid_json = JSON.parse(File.read("test/sieves/video_sieves/test_data/video_sieve_<%= name %>_invalid.json"))
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_sieve_properly_fails_check
|
13
|
+
assert VideoSieve<%= camel_name %>.check(@invalid_json) == false
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_sieve_properly_passes_check
|
17
|
+
assert VideoSieve<%= camel_name %>.check(@valid_json)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_sieve_can_sieve_properly
|
21
|
+
result = VideoSieve<%= camel_name %>.sieve(@valid_json)
|
22
|
+
|
23
|
+
# TODO: Update the values for the post you're testing
|
24
|
+
assert_equal "394367115960503", result[:id]
|
25
|
+
assert_equal 173, result[:num_comments]
|
26
|
+
assert_equal nil, result[:num_shared]
|
27
|
+
assert_equal nil, result[:num_views]
|
28
|
+
assert_equal false, result[:reshare_warning]
|
29
|
+
assert_not_nil result[:video_preview_image_url]
|
30
|
+
assert_not_nil result[:video_url]
|
31
|
+
assert_equal nil, text
|
32
|
+
assert_equal 1654989063, result[:created_at]
|
33
|
+
assert_equal nil, result[:profile_link]
|
34
|
+
assert_equal true, result[:has_video]
|
35
|
+
assert_not_nil result[:video_preview_image_file]
|
36
|
+
assert_not_nil result[:video_file]
|
37
|
+
assert_not_nil result[:reactions]
|
38
|
+
|
39
|
+
assert result[:reactions].kind_of?(Array)
|
40
|
+
end
|
41
|
+
end
|
data/forki.gemspec
CHANGED
@@ -37,6 +37,8 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.add_dependency "oj" # A faster JSON parser/loader than stdlib
|
38
38
|
spec.add_dependency "selenium-webdriver" # Webdriver selenium
|
39
39
|
|
40
|
+
spec.add_development_dependency "thor" # For the generator
|
41
|
+
|
40
42
|
# For more information and examples about making a new gem, checkout our
|
41
43
|
# guide at: https://bundler.io/guides/creating_gem.html
|
42
44
|
end
|
@@ -4,6 +4,7 @@ require "typhoeus"
|
|
4
4
|
require "securerandom"
|
5
5
|
require "byebug"
|
6
6
|
|
7
|
+
|
7
8
|
module Forki
|
8
9
|
# rubocop:disable Metrics/ClassLength
|
9
10
|
class PostScraper < Scraper
|
@@ -100,7 +101,11 @@ module Forki
|
|
100
101
|
begin
|
101
102
|
find("span", wait: 5, text: "This Video Isn't Available Anymore", exact_text: false)
|
102
103
|
rescue Capybara::ElementNotFound, Selenium::WebDriver::Error::StaleElementReferenceError
|
103
|
-
|
104
|
+
begin
|
105
|
+
find("span", wait: 5, text: "This Page Isn't Available", exact_text: false)
|
106
|
+
rescue Capybara::ElementNotFound, Selenium::WebDriver::Error::StaleElementReferenceError
|
107
|
+
return true
|
108
|
+
end
|
104
109
|
end
|
105
110
|
end
|
106
111
|
|
@@ -150,21 +155,30 @@ module Forki
|
|
150
155
|
end
|
151
156
|
|
152
157
|
graphql_object_array = graphql_strings.map { |graphql_string| JSON.parse(graphql_string) }
|
158
|
+
|
159
|
+
# Once in awhile it's really easy
|
160
|
+
video_objects = graphql_object_array.filter {|go| go.has_key?("video") }
|
161
|
+
|
162
|
+
if VideoSieve.can_process_with_sieve?(graphql_object_array)
|
163
|
+
# Eventually all of this complexity will be replaced with this
|
164
|
+
return VideoSieve.sieve_for_graphql_objects(graphql_object_array)
|
165
|
+
end
|
166
|
+
|
153
167
|
story_node_object = graphql_object_array.find { |graphql_object| graphql_object.key? "node" }&.fetch("node", nil) # user posted video
|
154
168
|
story_node_object = story_node_object || graphql_object_array.find { |graphql_object| graphql_object.key? "nodes" }&.fetch("nodes")&.first # page posted video
|
155
169
|
|
156
170
|
return extract_video_post_data_alternative(graphql_object_array) if story_node_object.nil?
|
157
171
|
|
158
172
|
if story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("media")
|
159
|
-
video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]
|
160
|
-
creation_date = video_object["publish_time"]
|
161
|
-
|
173
|
+
video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
|
174
|
+
creation_date = video_object["publish_time"] if video_object&.has_key("publish_time")
|
175
|
+
creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"] if creation_date.nil?
|
162
176
|
elsif story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("style_infos")
|
163
177
|
# For "Reels" we need a separate way to parse this
|
164
178
|
video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["style_infos"].first["fb_shorts_story"]["short_form_video_context"]["playback_video"]
|
165
179
|
creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["style_infos"].first["fb_shorts_story"]["creation_time"]
|
166
180
|
else
|
167
|
-
raise "Unable to parse video object"
|
181
|
+
raise "Unable to parse video object" if video_objects.empty?
|
168
182
|
end
|
169
183
|
|
170
184
|
feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
|
@@ -187,7 +201,7 @@ module Forki
|
|
187
201
|
num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
188
202
|
reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
189
203
|
video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
190
|
-
video_url: video_object["
|
204
|
+
video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
191
205
|
text: text,
|
192
206
|
created_at: creation_date,
|
193
207
|
profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
@@ -213,7 +227,7 @@ module Forki
|
|
213
227
|
num_views: feedback_object["video_view_count"],
|
214
228
|
reshare_warning: feedback_object["should_show_reshare_warning"],
|
215
229
|
video_preview_image_url: video_object["video"]["preferred_thumbnail"]["image"]["uri"],
|
216
|
-
video_url: video_object["video"]["playable_url_quality_hd"] || video_object["video"]["playable_url"],
|
230
|
+
video_url: video_object["video"]["playable_url_quality_hd"] || video_object["video"]["browser_native_hd_url"] || video_object["video"]["browser_native_sd_url"] || video_object["video"]["playable_url"],
|
217
231
|
text: sidepane_object["tahoe_sidepane_renderer"]["video"]["creation_story"]["comet_sections"]["message"]["story"]["message"]["text"],
|
218
232
|
created_at: video_object["video"]["publish_time"],
|
219
233
|
profile_link: sidepane_object["tahoe_sidepane_renderer"]["video"]["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
@@ -365,6 +379,7 @@ module Forki
|
|
365
379
|
def parse(url)
|
366
380
|
validate_and_load_page(url)
|
367
381
|
graphql_strings = find_graphql_data_strings(page.html)
|
382
|
+
|
368
383
|
post_data = extract_post_data(graphql_strings)
|
369
384
|
post_data[:url] = url
|
370
385
|
user_url = post_data[:profile_link]
|
@@ -394,3 +409,6 @@ module Forki
|
|
394
409
|
end
|
395
410
|
end
|
396
411
|
end
|
412
|
+
|
413
|
+
require_relative "sieves/video_sieves/video_sieve"
|
414
|
+
|
@@ -7,6 +7,7 @@ require "oj"
|
|
7
7
|
require "selenium-webdriver"
|
8
8
|
require "open-uri"
|
9
9
|
require "selenium/webdriver/remote/http/curb"
|
10
|
+
require "cgi"
|
10
11
|
|
11
12
|
options = Selenium::WebDriver::Options.chrome(exclude_switches: ["enable-automation"])
|
12
13
|
options.add_argument("--start-maximized")
|
@@ -112,18 +113,41 @@ module Forki
|
|
112
113
|
|
113
114
|
url ||= "https://www.facebook.com"
|
114
115
|
|
115
|
-
|
116
116
|
page.driver.browser.navigate.to(url) # Visit the url passed in or the facebook homepage if nothing is
|
117
117
|
|
118
118
|
# Look for "login_form" box, which throws an error if not found. So we catch it and run the rest of the tests
|
119
119
|
begin
|
120
120
|
login_form = first(id: "login_form", wait: 5)
|
121
121
|
rescue Capybara::ElementNotFound
|
122
|
-
|
122
|
+
begin
|
123
|
+
login_form = find(:xpath, '//form[@data-testid="royal_login_form"]')
|
124
|
+
rescue Capybara::ElementNotFound
|
125
|
+
return unless page.title.downcase.include?("facebook - log in")
|
126
|
+
end
|
123
127
|
end
|
124
128
|
|
125
|
-
# Since we're not logged in, let's do that
|
126
|
-
|
129
|
+
# Since we're not logged in, let's do that quickly
|
130
|
+
if login_form.nil?
|
131
|
+
page.driver.browser.navigate.to("https://www.facebook.com")
|
132
|
+
|
133
|
+
# Find the login form... again (Yes, we could extract this out, but it's only ever used
|
134
|
+
# here, so it's not worth the effort)
|
135
|
+
begin
|
136
|
+
login_form = first(id: "login_form", wait: 5)
|
137
|
+
rescue Capybara::ElementNotFound
|
138
|
+
begin
|
139
|
+
login_form = find(:xpath, '//form[@data-testid="royal_login_form"]')
|
140
|
+
rescue Capybara::ElementNotFound
|
141
|
+
return unless page.title.downcase.include?("facebook - log in")
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
if login_form.nil?
|
147
|
+
# maybe we're already logged in?
|
148
|
+
sleep(rand * 10.3)
|
149
|
+
return
|
150
|
+
end
|
127
151
|
|
128
152
|
login_form.fill_in("email", with: ENV["FACEBOOK_EMAIL"])
|
129
153
|
login_form.fill_in("pass", with: ENV["FACEBOOK_PASSWORD"])
|
@@ -154,13 +178,34 @@ module Forki
|
|
154
178
|
# If either of those two conditions are false, raises an exception
|
155
179
|
def validate_and_load_page(url)
|
156
180
|
Capybara.app_host = "https://www.facebook.com"
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
raise Forki::InvalidUrlError unless
|
181
|
+
facebook_hosts = ["facebook.com", "www.facebook.com", "web.facebook.com", "m.facebook.com"]
|
182
|
+
parsed_url = URI.parse(url)
|
183
|
+
host = parsed_url.host
|
184
|
+
raise Forki::InvalidUrlError unless facebook_hosts.include?(host)
|
185
|
+
|
186
|
+
# Replace the host with a default one to prevent redirect loops that can happen
|
187
|
+
unless parsed_url.host == "www.facebook.com"
|
188
|
+
parsed_url.host = "www.facebook.com"
|
189
|
+
url = parsed_url.to_s
|
190
|
+
end
|
191
|
+
|
192
|
+
visit "https://www.facebook.com"
|
193
|
+
login
|
194
|
+
|
161
195
|
visit url unless current_url.start_with?(url)
|
196
|
+
# # If the video is a watch page it doesn't have most of the data we want so we click on the video
|
197
|
+
# if url.include?("watch/live")
|
198
|
+
# clickable_element = find("video")
|
199
|
+
|
200
|
+
# while(clickable_element.obscured?)
|
201
|
+
# clickable_element = clickable_element.find(:xpath, "..")
|
202
|
+
# end
|
203
|
+
|
204
|
+
# clickable_element.click
|
205
|
+
# end
|
162
206
|
end
|
163
207
|
|
208
|
+
|
164
209
|
# Extracts an integer out of a string describing a number
|
165
210
|
# e.g. "4K Comments" returns 4000
|
166
211
|
# e.g. "131 Shares" returns 131
|
@@ -171,19 +216,23 @@ module Forki
|
|
171
216
|
element = element.text(:all)
|
172
217
|
end
|
173
218
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
interaction_num_text.to_i + interaction_num_text[-2].to_i * 100
|
179
|
-
elsif interaction_num_text.include?("K") # e.g. "13K"
|
180
|
-
interaction_num_text.to_i * 1000
|
181
|
-
elsif interaction_num_text.include?("M") # e.g. "13M"
|
182
|
-
interaction_num_text.to_i * 1_000_000
|
183
|
-
else # e.g. "15,443"
|
184
|
-
interaction_num_text.delete!(",")
|
185
|
-
interaction_num_text.delete(" ").to_i
|
219
|
+
# Check if there's a modifier i.e. `K` or `M` if there isn't just return the number
|
220
|
+
unless element.include?("K") || element.include?("M")
|
221
|
+
element.delete(",") # "5,456" e.g.
|
222
|
+
return element.to_i
|
186
223
|
end
|
224
|
+
|
225
|
+
modifier = element[-1]
|
226
|
+
number = element[0...-1].to_f
|
227
|
+
|
228
|
+
case modifier
|
229
|
+
when "K"
|
230
|
+
number = number * 1_000
|
231
|
+
when "M"
|
232
|
+
number = number * 1_000_000
|
233
|
+
end
|
234
|
+
|
235
|
+
number.to_i
|
187
236
|
end
|
188
237
|
end
|
189
238
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class ImageSieve
|
2
|
+
def self.can_process_with_sieve?(graphql_objects)
|
3
|
+
!sieve_class_for_graphql_objects(graphql_objects).nil?
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.sieve_for_graphql_objects(graphql_objects)
|
7
|
+
|
8
|
+
sieve = sieve_class_for_graphql_objects(graphql_objects)
|
9
|
+
return nil if sieve.nil?
|
10
|
+
|
11
|
+
sieve.sieve(graphql_objects)
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def self.sieve_class_for_graphql_objects(graphql_objects)
|
17
|
+
sieves = []
|
18
|
+
sieves.detect { |sieve| sieve.check(graphql_objects) }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
Dir['./lib/forki/scrapers/sieves/image_sieves/*.rb'].each do |file|
|
24
|
+
require file unless file.end_with?("image_sieve.rb")
|
25
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class VideoSieve
|
2
|
+
def self.can_process_with_sieve?(graphql_objects)
|
3
|
+
!sieve_class_for_graphql_objects(graphql_objects).nil?
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.sieve_for_graphql_objects(graphql_objects)
|
7
|
+
|
8
|
+
sieve = sieve_class_for_graphql_objects(graphql_objects)
|
9
|
+
return nil if sieve.nil?
|
10
|
+
|
11
|
+
sieve.sieve(graphql_objects)
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def self.sieve_class_for_graphql_objects(graphql_objects)
|
17
|
+
sieves = [VideoSieveWatchTab, VideoSieveVideoPage]
|
18
|
+
sieves.detect { |sieve| sieve.check(graphql_objects) }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
Dir['./lib/forki/scrapers/sieves/video_sieves/*.rb'].each do |file|
|
23
|
+
require file unless file.end_with?("video_sieve.rb")
|
24
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
class VideoSieveVideoPage < VideoSieve
|
2
|
+
# To check if it's valid for the inputted graphql objects
|
3
|
+
def self.check(graphql_objects)
|
4
|
+
story_node_object = self.extractor(graphql_objects) # This will error out
|
5
|
+
return false unless story_node_object["content"]["story"]["attachments"].first["styles"]["attachment"].has_key?("media")
|
6
|
+
|
7
|
+
true
|
8
|
+
rescue StandardError
|
9
|
+
return false
|
10
|
+
end
|
11
|
+
|
12
|
+
# output the expected format of:
|
13
|
+
#
|
14
|
+
# post_details = {
|
15
|
+
# id: video_object["id"],
|
16
|
+
# num_comments: num_comments,
|
17
|
+
# num_shares: share_count_object.fetch("count", nil),
|
18
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
19
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
20
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
21
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
22
|
+
# text: text,
|
23
|
+
# created_at: creation_date,
|
24
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
25
|
+
# has_video: true
|
26
|
+
# }
|
27
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
28
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
29
|
+
# post_details[:reactions] = reaction_counts
|
30
|
+
|
31
|
+
def self.sieve(graphql_objects)
|
32
|
+
extracted_text = self.extractor(graphql_objects)
|
33
|
+
|
34
|
+
story_object = extracted_text["content"]["story"]
|
35
|
+
video_object = extracted_text["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]
|
36
|
+
feedback_object = extracted_text["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
37
|
+
|
38
|
+
video_preview_image_url = video_object["preferred_thumbnail"]["image"]["uri"]
|
39
|
+
video_url = video_object["browser_native_hd_url"]
|
40
|
+
video_url = video_object["browser_native_sd_url"] if video_url.nil?
|
41
|
+
|
42
|
+
post_details = {
|
43
|
+
id: video_object["id"],
|
44
|
+
num_comments: feedback_object["total_comment_count"],
|
45
|
+
num_shared: feedback_object["share_count"]["count"],
|
46
|
+
num_views: nil,
|
47
|
+
reshare_warning: feedback_object["should_show_reshare_warning"],
|
48
|
+
video_preview_image_url: video_preview_image_url,
|
49
|
+
video_url: video_url,
|
50
|
+
text: story_object["message"]["text"],
|
51
|
+
created_at: video_object["publish_time"],
|
52
|
+
profile_link: story_object["actors"].first["url"],
|
53
|
+
has_video: true,
|
54
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
55
|
+
video_file: Forki.retrieve_media(video_url),
|
56
|
+
reactions: feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def self.extractor(graphql_objects)
|
63
|
+
story_node_object = graphql_objects.find { |graphql_object| graphql_object.key? "node" }&.fetch("node", nil) # user posted video
|
64
|
+
story_node_object["comet_sections"]
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# This is for the "watch" tab style videos https://www.facebook.com/watch/live/?v=394367115960503
|
2
|
+
|
3
|
+
class VideoSieveWatchTab < VideoSieve
|
4
|
+
# To check if it's valid for the inputted graphql objects
|
5
|
+
def self.check(graphql_objects)
|
6
|
+
video_object = self.extractor(graphql_objects)
|
7
|
+
return false if video_object.nil?
|
8
|
+
|
9
|
+
video_object = video_object["attachments"]
|
10
|
+
return false if video_object.nil?
|
11
|
+
|
12
|
+
return false unless video_object.kind_of?(Array) && !video_object.empty?
|
13
|
+
|
14
|
+
video_object = video_object.first
|
15
|
+
return false unless video_object.kind_of?(Hash) && video_object.keys.include?("media")
|
16
|
+
|
17
|
+
true
|
18
|
+
rescue StandardError
|
19
|
+
return false
|
20
|
+
end
|
21
|
+
|
22
|
+
# output the expected format of:
|
23
|
+
#
|
24
|
+
# post_details = {
|
25
|
+
# id: video_object["id"],
|
26
|
+
# num_comments: num_comments,
|
27
|
+
# num_shares: share_count_object.fetch("count", nil),
|
28
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
29
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
30
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
31
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
32
|
+
# text: text,
|
33
|
+
# created_at: creation_date,
|
34
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
35
|
+
# has_video: true
|
36
|
+
# }
|
37
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
38
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
39
|
+
# post_details[:reactions] = reaction_counts
|
40
|
+
|
41
|
+
def self.sieve(graphql_objects)
|
42
|
+
video_object = self.extractor(graphql_objects)
|
43
|
+
|
44
|
+
video_url = video_object["attachments"].first["media"]["browser_native_sd_url"]
|
45
|
+
video_preview_image_url = video_object["attachments"].first["media"]["preferred_thumbnail"]["image"]["uri"]
|
46
|
+
|
47
|
+
if !video_object["feedback_context"].nil?
|
48
|
+
feedback_object = video_object["feedback_context"]["feedback_target_with_context"]
|
49
|
+
else
|
50
|
+
feedback_object = graphql_objects.find { |go| !go.dig("feedback", "total_comment_count").nil? }
|
51
|
+
feedback_object = feedback_object["feedback"] if feedback_object.has_key?("feedback")
|
52
|
+
end
|
53
|
+
|
54
|
+
profile_link = video_object["attachments"].first["media"]["owner"]["url"]
|
55
|
+
if profile_link.nil?
|
56
|
+
filtered_json = graphql_objects.find { |go| go.has_key? "attachments" }
|
57
|
+
profile_link = filtered_json["attachments"].first["media"]["creation_story"]["comet_sections"]["title"]["story"]["actors"].first["url"]
|
58
|
+
end
|
59
|
+
|
60
|
+
post_details = {
|
61
|
+
id: video_object.dig("shareable", "id") || video_object["attachments"].first["media"]["id"],
|
62
|
+
num_comments: feedback_object["total_comment_count"],
|
63
|
+
num_shared: nil, # This is not associated with these videos in this format
|
64
|
+
num_views: nil, # This is not associated with these videos in this format
|
65
|
+
reshare_warning: feedback_object["should_show_reshare_warning"],
|
66
|
+
video_preview_image_url: video_preview_image_url,
|
67
|
+
video_url: video_url,
|
68
|
+
text: nil, # There is no text associated with these videos
|
69
|
+
created_at: video_object["attachments"].first["media"]["publish_time"],
|
70
|
+
profile_link: profile_link,
|
71
|
+
has_video: true,
|
72
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
73
|
+
video_file: Forki.retrieve_media(video_url),
|
74
|
+
reactions: feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def self.extractor(graphql_objects)
|
81
|
+
video_objects = graphql_objects.filter do |go|
|
82
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
83
|
+
go.has_key?("video")
|
84
|
+
end
|
85
|
+
|
86
|
+
story = video_objects.first.dig("video", "creation_story")
|
87
|
+
story = video_objects.first.dig("video", "story") if story.nil?
|
88
|
+
|
89
|
+
story
|
90
|
+
end
|
91
|
+
end
|
@@ -3,10 +3,14 @@ require "typhoeus"
|
|
3
3
|
module Forki
|
4
4
|
class UserScraper < Scraper
|
5
5
|
# Finds and returns the number of people who like the current page
|
6
|
-
def find_number_of_likes
|
7
|
-
likes_pattern = /[0-9,.KM ]
|
8
|
-
|
9
|
-
|
6
|
+
def find_number_of_likes(profile_details_string)
|
7
|
+
likes_pattern = /[0-9,.KM ] likes/
|
8
|
+
likes_pattern = /(?<num_likes>[0-9,.KM ]+) (l|L)ikes/
|
9
|
+
number_of_likes_match = likes_pattern.match(profile_details_string)
|
10
|
+
|
11
|
+
return nil if number_of_likes_match.nil?
|
12
|
+
|
13
|
+
extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
|
10
14
|
end
|
11
15
|
|
12
16
|
# Finds and returns the number of people who follow the current page
|
@@ -14,8 +18,18 @@ module Forki
|
|
14
18
|
followers_pattern = /Followed by (?<num_followers>[0-9,.KM ]) people/
|
15
19
|
alt_follower_pattern = /(?<num_followers>[0-9,.KM ]+) (f|F)ollowers/
|
16
20
|
number_of_followers_match = followers_pattern.match(profile_details_string) || alt_follower_pattern.match(profile_details_string)
|
21
|
+
|
17
22
|
return nil if number_of_followers_match.nil?
|
18
|
-
|
23
|
+
|
24
|
+
number_of_followers = extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
|
25
|
+
|
26
|
+
# Note, this is sticking around if we want to use it later
|
27
|
+
# if number_of_followers.nil?
|
28
|
+
# number_of_followers_string = JSON.parse(profile_header_str)["user"]["profile_header_renderer"]["user"]["profile_social_context"]["content"].first["text"]["text"]
|
29
|
+
# number_of_followers = extract_int_from_num_element(number_of_followers_string)
|
30
|
+
# end
|
31
|
+
|
32
|
+
number_of_followers
|
19
33
|
end
|
20
34
|
|
21
35
|
def find_number_followers_for_normal_profile(profile_followers_node)
|
@@ -61,6 +75,7 @@ module Forki
|
|
61
75
|
verified: profile_header_obj["user"]["is_verified"],
|
62
76
|
profile: profile_intro_obj ? profile_intro_obj["profile_intro_card"]["bio"]["text"] : "",
|
63
77
|
profile_image_url: profile_header_obj["user"]["profilePicLarge"]["uri"],
|
78
|
+
number_of_likes: find_number_of_likes(profile_header_str),
|
64
79
|
}
|
65
80
|
end
|
66
81
|
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: thor
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
description:
|
84
98
|
email:
|
85
99
|
- ''
|
@@ -99,12 +113,21 @@ files:
|
|
99
113
|
- README.md
|
100
114
|
- Rakefile
|
101
115
|
- bin/console
|
116
|
+
- bin/generate_sieve
|
117
|
+
- bin/generator_templates/image_sieve_template.rb.erb
|
118
|
+
- bin/generator_templates/image_sieve_test_template.rb.erb
|
119
|
+
- bin/generator_templates/video_sieve_template.rb.erb
|
120
|
+
- bin/generator_templates/video_sieve_test_template.rb.erb
|
102
121
|
- bin/setup
|
103
122
|
- forki.gemspec
|
104
123
|
- lib/forki.rb
|
105
124
|
- lib/forki/post.rb
|
106
125
|
- lib/forki/scrapers/post_scraper.rb
|
107
126
|
- lib/forki/scrapers/scraper.rb
|
127
|
+
- lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
|
128
|
+
- lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
|
129
|
+
- lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
|
130
|
+
- lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
|
108
131
|
- lib/forki/scrapers/user_scraper.rb
|
109
132
|
- lib/forki/user.rb
|
110
133
|
- lib/forki/version.rb
|
@@ -139,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
162
|
- !ruby/object:Gem::Version
|
140
163
|
version: '0'
|
141
164
|
requirements: []
|
142
|
-
rubygems_version: 3.
|
165
|
+
rubygems_version: 3.4.14
|
143
166
|
signing_key:
|
144
167
|
specification_version: 4
|
145
168
|
summary: A gem to scrape Facebook pages for archive purposes.
|