forki 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/bin/generator_templates/image_sieve_test_template.rb.erb +1 -1
- data/bin/generator_templates/video_sieve_test_template.rb.erb +1 -1
- data/lib/forki/scrapers/post_scraper.rb +2 -2
- data/lib/forki/scrapers/scraper.rb +1 -1
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve.rb +1 -1
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb +78 -0
- data/lib/forki/scrapers/user_scraper.rb +4 -4
- data/lib/forki/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1c8304ed1312aa0e5a7f2f8f5ad2de9a407c761fb7ab9212d66465f599caf6f
|
4
|
+
data.tar.gz: e35a13415b85b9bd57cd9028841c61a324e4011e1a449cd102fe470015ed670d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4179d91435cdc9c82c39c92a15f3df4e7e437f250eccf01e79cf9cb631c326b76badaed3e5851cc26c91713c0cdc27feba56127e8d8c647525960286efd50412
|
7
|
+
data.tar.gz: b5f8ca4461440a785d544017fc1dbc72e9edfdc1d6924717813cf7cd1bfb67fa8695ecc072d00fd476d55eee3ecedca710c107a11bbfe89e8d4b067f443040ae
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
forki (0.1
|
4
|
+
forki (0.2.1)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -127,7 +127,7 @@ GEM
|
|
127
127
|
rubocop-rails (~> 2.0)
|
128
128
|
ruby-progressbar (1.13.0)
|
129
129
|
rubyzip (2.3.2)
|
130
|
-
selenium-webdriver (4.
|
130
|
+
selenium-webdriver (4.11.0)
|
131
131
|
rexml (~> 3.2, >= 3.2.5)
|
132
132
|
rubyzip (>= 1.2.2, < 3.0)
|
133
133
|
websocket (~> 1.0)
|
@@ -29,7 +29,7 @@ class ImageSieve<%= camel_name %>Test < Minitest::Test
|
|
29
29
|
assert_equal false, result[:reshare_warning]
|
30
30
|
assert_not_nil result[:video_preview_image_url]
|
31
31
|
assert_not_nil result[:video_url]
|
32
|
-
assert_equal nil, text
|
32
|
+
assert_equal nil, result[:text]
|
33
33
|
assert_equal 1654989063, result[:created_at]
|
34
34
|
assert_equal nil, result[:profile_link]
|
35
35
|
assert_equal false, result[:has_video]
|
@@ -28,7 +28,7 @@ class VideoSieve<%= camel_name %>Test < Minitest::Test
|
|
28
28
|
assert_equal false, result[:reshare_warning]
|
29
29
|
assert_not_nil result[:video_preview_image_url]
|
30
30
|
assert_not_nil result[:video_url]
|
31
|
-
assert_equal nil, text
|
31
|
+
assert_equal nil, result[:text]
|
32
32
|
assert_equal 1654989063, result[:created_at]
|
33
33
|
assert_equal nil, result[:profile_link]
|
34
34
|
assert_equal true, result[:has_video]
|
@@ -15,7 +15,7 @@ module Forki
|
|
15
15
|
views_pattern = /[0-9MK, ]+Views/
|
16
16
|
spans = all("span")
|
17
17
|
views_span = spans.find { |s| s.text(:all) =~ views_pattern }
|
18
|
-
extract_int_from_num_element(views_span)
|
18
|
+
Scraper.extract_int_from_num_element(views_span)
|
19
19
|
end
|
20
20
|
|
21
21
|
def extract_post_data(graphql_strings)
|
@@ -215,7 +215,7 @@ module Forki
|
|
215
215
|
|
216
216
|
def extract_video_post_data_alternative(graphql_object_array)
|
217
217
|
sidepane_object = graphql_object_array.find { |graphql_object| graphql_object.key?("tahoe_sidepane_renderer") }
|
218
|
-
video_object = graphql_object_array.find { |graphql_object| graphql_object.
|
218
|
+
video_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("video") }
|
219
219
|
feedback_object = sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]
|
220
220
|
reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
|
221
221
|
share_count_object = feedback_object.fetch("share_count", {})
|
@@ -209,7 +209,7 @@ module Forki
|
|
209
209
|
# Extracts an integer out of a string describing a number
|
210
210
|
# e.g. "4K Comments" returns 4000
|
211
211
|
# e.g. "131 Shares" returns 131
|
212
|
-
def extract_int_from_num_element(element)
|
212
|
+
def self.extract_int_from_num_element(element)
|
213
213
|
return unless element
|
214
214
|
|
215
215
|
if element.class != String # if an html element was passed in
|
@@ -14,7 +14,7 @@ class VideoSieve
|
|
14
14
|
private
|
15
15
|
|
16
16
|
def self.sieve_class_for_graphql_objects(graphql_objects)
|
17
|
-
sieves = [VideoSieveWatchTab, VideoSieveVideoPage]
|
17
|
+
sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveReel]
|
18
18
|
sieves.detect { |sieve| sieve.check(graphql_objects) }
|
19
19
|
end
|
20
20
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
class VideoSieveReel < VideoSieve
|
2
|
+
# To check if it's valid for the inputted graphql objects
|
3
|
+
def self.check(graphql_objects)
|
4
|
+
video_object = self.extractor(graphql_objects)
|
5
|
+
|
6
|
+
return false unless video_object.has_key?("short_form_video_context")
|
7
|
+
true
|
8
|
+
rescue StandardError
|
9
|
+
return false
|
10
|
+
end
|
11
|
+
|
12
|
+
# output the expected format of:
|
13
|
+
#
|
14
|
+
# post_details = {
|
15
|
+
# id: video_object["id"],
|
16
|
+
# num_comments: num_comments,
|
17
|
+
# num_shares: share_count_object.fetch("count", nil),
|
18
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
19
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
20
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
21
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
22
|
+
# text: text,
|
23
|
+
# created_at: creation_date,
|
24
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
25
|
+
# has_video: true
|
26
|
+
# }
|
27
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
28
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
29
|
+
# post_details[:reactions] = reaction_counts
|
30
|
+
|
31
|
+
def self.sieve(graphql_objects)
|
32
|
+
video_object = self.extractor(graphql_objects)
|
33
|
+
|
34
|
+
|
35
|
+
feedback_object = graphql_objects.filter do |go|
|
36
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
37
|
+
!go.dig("feedback", "top_level_comments").nil?
|
38
|
+
end.first
|
39
|
+
|
40
|
+
reels_feedback_renderer = graphql_objects.filter do |go|
|
41
|
+
go.dig("reels_feedback_renderer")
|
42
|
+
end.first
|
43
|
+
|
44
|
+
reels_feedback_renderer["reels_feedback_renderer"]["story"]
|
45
|
+
reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
|
46
|
+
|
47
|
+
video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
|
48
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
|
49
|
+
|
50
|
+
post_details = {
|
51
|
+
id: video_object["short_form_video_context"]["video"]["id"],
|
52
|
+
num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
|
53
|
+
num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
|
54
|
+
num_views: nil,
|
55
|
+
reshare_warning: reshare_warning,
|
56
|
+
video_preview_image_url: video_preview_image_url,
|
57
|
+
video_url: video_url,
|
58
|
+
text: nil, # Reels don't have text
|
59
|
+
created_at: JSON.parse(feedback_object["tracking"])["page_insights"].first[1]["post_context"]["publish_time"], # Yea, this is weird
|
60
|
+
profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
|
61
|
+
has_video: true,
|
62
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
63
|
+
video_file: Forki.retrieve_media(video_url),
|
64
|
+
reactions: nil # Only available on comments it seems? Look into this again sometime
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def self.extractor(graphql_objects)
|
71
|
+
video_objects = graphql_objects.filter do |go|
|
72
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
73
|
+
go.has_key?("video")
|
74
|
+
end
|
75
|
+
|
76
|
+
video_objects.first.dig("video", "creation_story")
|
77
|
+
end
|
78
|
+
end
|
@@ -10,7 +10,7 @@ module Forki
|
|
10
10
|
|
11
11
|
return nil if number_of_likes_match.nil?
|
12
12
|
|
13
|
-
extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
|
13
|
+
Scraper.extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
|
14
14
|
end
|
15
15
|
|
16
16
|
# Finds and returns the number of people who follow the current page
|
@@ -21,12 +21,12 @@ module Forki
|
|
21
21
|
|
22
22
|
return nil if number_of_followers_match.nil?
|
23
23
|
|
24
|
-
number_of_followers = extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
|
24
|
+
number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
|
25
25
|
|
26
26
|
# Note, this is sticking around if we want to use it later
|
27
27
|
# if number_of_followers.nil?
|
28
28
|
# number_of_followers_string = JSON.parse(profile_header_str)["user"]["profile_header_renderer"]["user"]["profile_social_context"]["content"].first["text"]["text"]
|
29
|
-
# number_of_followers = extract_int_from_num_element(number_of_followers_string)
|
29
|
+
# number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_string)
|
30
30
|
# end
|
31
31
|
|
32
32
|
number_of_followers
|
@@ -36,7 +36,7 @@ module Forki
|
|
36
36
|
followers_string = profile_followers_node["node"]["timeline_context_item"]["renderer"]["context_item"]["title"]["text"]
|
37
37
|
followers_pattern = /[0-9,]+/
|
38
38
|
number_of_followers_match = followers_pattern.match(followers_string).to_s
|
39
|
-
extract_int_from_num_element(number_of_followers_match)
|
39
|
+
Scraper.extract_int_from_num_element(number_of_followers_match)
|
40
40
|
end
|
41
41
|
|
42
42
|
# Returns a hash of details about a Facebook user profile
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- lib/forki/scrapers/scraper.rb
|
127
127
|
- lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
|
128
128
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
|
129
|
+
- lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
|
129
130
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
|
130
131
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
|
131
132
|
- lib/forki/scrapers/user_scraper.rb
|