forki 0.2.1 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/bin/generator_templates/image_sieve_test_template.rb.erb +1 -1
- data/bin/generator_templates/video_sieve_test_template.rb.erb +1 -1
- data/lib/forki/scrapers/post_scraper.rb +2 -2
- data/lib/forki/scrapers/scraper.rb +1 -1
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve.rb +1 -1
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb +78 -0
- data/lib/forki/scrapers/user_scraper.rb +4 -4
- data/lib/forki/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1c8304ed1312aa0e5a7f2f8f5ad2de9a407c761fb7ab9212d66465f599caf6f
|
4
|
+
data.tar.gz: e35a13415b85b9bd57cd9028841c61a324e4011e1a449cd102fe470015ed670d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4179d91435cdc9c82c39c92a15f3df4e7e437f250eccf01e79cf9cb631c326b76badaed3e5851cc26c91713c0cdc27feba56127e8d8c647525960286efd50412
|
7
|
+
data.tar.gz: b5f8ca4461440a785d544017fc1dbc72e9edfdc1d6924717813cf7cd1bfb67fa8695ecc072d00fd476d55eee3ecedca710c107a11bbfe89e8d4b067f443040ae
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
forki (0.1
|
4
|
+
forki (0.2.1)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -127,7 +127,7 @@ GEM
|
|
127
127
|
rubocop-rails (~> 2.0)
|
128
128
|
ruby-progressbar (1.13.0)
|
129
129
|
rubyzip (2.3.2)
|
130
|
-
selenium-webdriver (4.
|
130
|
+
selenium-webdriver (4.11.0)
|
131
131
|
rexml (~> 3.2, >= 3.2.5)
|
132
132
|
rubyzip (>= 1.2.2, < 3.0)
|
133
133
|
websocket (~> 1.0)
|
@@ -29,7 +29,7 @@ class ImageSieve<%= camel_name %>Test < Minitest::Test
|
|
29
29
|
assert_equal false, result[:reshare_warning]
|
30
30
|
assert_not_nil result[:video_preview_image_url]
|
31
31
|
assert_not_nil result[:video_url]
|
32
|
-
assert_equal nil, text
|
32
|
+
assert_equal nil, result[:text]
|
33
33
|
assert_equal 1654989063, result[:created_at]
|
34
34
|
assert_equal nil, result[:profile_link]
|
35
35
|
assert_equal false, result[:has_video]
|
@@ -28,7 +28,7 @@ class VideoSieve<%= camel_name %>Test < Minitest::Test
|
|
28
28
|
assert_equal false, result[:reshare_warning]
|
29
29
|
assert_not_nil result[:video_preview_image_url]
|
30
30
|
assert_not_nil result[:video_url]
|
31
|
-
assert_equal nil, text
|
31
|
+
assert_equal nil, result[:text]
|
32
32
|
assert_equal 1654989063, result[:created_at]
|
33
33
|
assert_equal nil, result[:profile_link]
|
34
34
|
assert_equal true, result[:has_video]
|
@@ -15,7 +15,7 @@ module Forki
|
|
15
15
|
views_pattern = /[0-9MK, ]+Views/
|
16
16
|
spans = all("span")
|
17
17
|
views_span = spans.find { |s| s.text(:all) =~ views_pattern }
|
18
|
-
extract_int_from_num_element(views_span)
|
18
|
+
Scraper.extract_int_from_num_element(views_span)
|
19
19
|
end
|
20
20
|
|
21
21
|
def extract_post_data(graphql_strings)
|
@@ -215,7 +215,7 @@ module Forki
|
|
215
215
|
|
216
216
|
def extract_video_post_data_alternative(graphql_object_array)
|
217
217
|
sidepane_object = graphql_object_array.find { |graphql_object| graphql_object.key?("tahoe_sidepane_renderer") }
|
218
|
-
video_object = graphql_object_array.find { |graphql_object| graphql_object.
|
218
|
+
video_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("video") }
|
219
219
|
feedback_object = sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]
|
220
220
|
reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
|
221
221
|
share_count_object = feedback_object.fetch("share_count", {})
|
@@ -209,7 +209,7 @@ module Forki
|
|
209
209
|
# Extracts an integer out of a string describing a number
|
210
210
|
# e.g. "4K Comments" returns 4000
|
211
211
|
# e.g. "131 Shares" returns 131
|
212
|
-
def extract_int_from_num_element(element)
|
212
|
+
def self.extract_int_from_num_element(element)
|
213
213
|
return unless element
|
214
214
|
|
215
215
|
if element.class != String # if an html element was passed in
|
@@ -14,7 +14,7 @@ class VideoSieve
|
|
14
14
|
private
|
15
15
|
|
16
16
|
def self.sieve_class_for_graphql_objects(graphql_objects)
|
17
|
-
sieves = [VideoSieveWatchTab, VideoSieveVideoPage]
|
17
|
+
sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveReel]
|
18
18
|
sieves.detect { |sieve| sieve.check(graphql_objects) }
|
19
19
|
end
|
20
20
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
class VideoSieveReel < VideoSieve
|
2
|
+
# To check if it's valid for the inputted graphql objects
|
3
|
+
def self.check(graphql_objects)
|
4
|
+
video_object = self.extractor(graphql_objects)
|
5
|
+
|
6
|
+
return false unless video_object.has_key?("short_form_video_context")
|
7
|
+
true
|
8
|
+
rescue StandardError
|
9
|
+
return false
|
10
|
+
end
|
11
|
+
|
12
|
+
# output the expected format of:
|
13
|
+
#
|
14
|
+
# post_details = {
|
15
|
+
# id: video_object["id"],
|
16
|
+
# num_comments: num_comments,
|
17
|
+
# num_shares: share_count_object.fetch("count", nil),
|
18
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
19
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
20
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
21
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
22
|
+
# text: text,
|
23
|
+
# created_at: creation_date,
|
24
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
25
|
+
# has_video: true
|
26
|
+
# }
|
27
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
28
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
29
|
+
# post_details[:reactions] = reaction_counts
|
30
|
+
|
31
|
+
def self.sieve(graphql_objects)
|
32
|
+
video_object = self.extractor(graphql_objects)
|
33
|
+
|
34
|
+
|
35
|
+
feedback_object = graphql_objects.filter do |go|
|
36
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
37
|
+
!go.dig("feedback", "top_level_comments").nil?
|
38
|
+
end.first
|
39
|
+
|
40
|
+
reels_feedback_renderer = graphql_objects.filter do |go|
|
41
|
+
go.dig("reels_feedback_renderer")
|
42
|
+
end.first
|
43
|
+
|
44
|
+
reels_feedback_renderer["reels_feedback_renderer"]["story"]
|
45
|
+
reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
|
46
|
+
|
47
|
+
video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
|
48
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
|
49
|
+
|
50
|
+
post_details = {
|
51
|
+
id: video_object["short_form_video_context"]["video"]["id"],
|
52
|
+
num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
|
53
|
+
num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
|
54
|
+
num_views: nil,
|
55
|
+
reshare_warning: reshare_warning,
|
56
|
+
video_preview_image_url: video_preview_image_url,
|
57
|
+
video_url: video_url,
|
58
|
+
text: nil, # Reels don't have text
|
59
|
+
created_at: JSON.parse(feedback_object["tracking"])["page_insights"].first[1]["post_context"]["publish_time"], # Yea, this is weird
|
60
|
+
profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
|
61
|
+
has_video: true,
|
62
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
63
|
+
video_file: Forki.retrieve_media(video_url),
|
64
|
+
reactions: nil # Only available on comments it seems? Look into this again sometime
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def self.extractor(graphql_objects)
|
71
|
+
video_objects = graphql_objects.filter do |go|
|
72
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
73
|
+
go.has_key?("video")
|
74
|
+
end
|
75
|
+
|
76
|
+
video_objects.first.dig("video", "creation_story")
|
77
|
+
end
|
78
|
+
end
|
@@ -10,7 +10,7 @@ module Forki
|
|
10
10
|
|
11
11
|
return nil if number_of_likes_match.nil?
|
12
12
|
|
13
|
-
extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
|
13
|
+
Scraper.extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
|
14
14
|
end
|
15
15
|
|
16
16
|
# Finds and returns the number of people who follow the current page
|
@@ -21,12 +21,12 @@ module Forki
|
|
21
21
|
|
22
22
|
return nil if number_of_followers_match.nil?
|
23
23
|
|
24
|
-
number_of_followers = extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
|
24
|
+
number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
|
25
25
|
|
26
26
|
# Note, this is sticking around if we want to use it later
|
27
27
|
# if number_of_followers.nil?
|
28
28
|
# number_of_followers_string = JSON.parse(profile_header_str)["user"]["profile_header_renderer"]["user"]["profile_social_context"]["content"].first["text"]["text"]
|
29
|
-
# number_of_followers = extract_int_from_num_element(number_of_followers_string)
|
29
|
+
# number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_string)
|
30
30
|
# end
|
31
31
|
|
32
32
|
number_of_followers
|
@@ -36,7 +36,7 @@ module Forki
|
|
36
36
|
followers_string = profile_followers_node["node"]["timeline_context_item"]["renderer"]["context_item"]["title"]["text"]
|
37
37
|
followers_pattern = /[0-9,]+/
|
38
38
|
number_of_followers_match = followers_pattern.match(followers_string).to_s
|
39
|
-
extract_int_from_num_element(number_of_followers_match)
|
39
|
+
Scraper.extract_int_from_num_element(number_of_followers_match)
|
40
40
|
end
|
41
41
|
|
42
42
|
# Returns a hash of details about a Facebook user profile
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- lib/forki/scrapers/scraper.rb
|
127
127
|
- lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
|
128
128
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
|
129
|
+
- lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
|
129
130
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
|
130
131
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
|
131
132
|
- lib/forki/scrapers/user_scraper.rb
|