forki 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f4868fd61b98809521249982e057f69d69018e8defba25792382f44e06b1921a
|
|
4
|
+
data.tar.gz: 1c700275aa5fafc19e4f6b42845e5d52c02e2bc3e6cd3f1082e1a59c5c4f472d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b5967db4c8e10b9d626767f041b2f59627170a49ceb4dc47e6da11f439a668bc656550e9b0c6a3a6a52180c8e75637ab2cbfb8a3c1b007bb91c4a5280e21aabd
|
|
7
|
+
data.tar.gz: 8230ec7b913fca196e255820066efab1c520e42ac38818652cb252966a16b8ce915f5f1224606cd5eddb81968fcdbd9f37506e028c641bcad2a402536174d321
|
|
@@ -393,7 +393,7 @@ module Forki
|
|
|
393
393
|
sleep(5)
|
|
394
394
|
end
|
|
395
395
|
|
|
396
|
-
# page.quit # Close browser between page
|
|
396
|
+
# page.quit # Close browser between page navigation to prevent cache folder access issues
|
|
397
397
|
|
|
398
398
|
post_data[:user] = User.lookup(user_url).first
|
|
399
399
|
page.quit
|
|
@@ -405,7 +405,12 @@ module Forki
|
|
|
405
405
|
rescue StandardError => e
|
|
406
406
|
raise e
|
|
407
407
|
ensure
|
|
408
|
-
page.
|
|
408
|
+
# `page` here can be broken already. In which case we want to raise an error so it's retried later
|
|
409
|
+
begin
|
|
410
|
+
page.quit
|
|
411
|
+
rescue Curl::Err::ConnectionFailedError
|
|
412
|
+
raise Forki::RretryableError # This insures it'll eventually be retried by Hypatia
|
|
413
|
+
end
|
|
409
414
|
end
|
|
410
415
|
end
|
|
411
416
|
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
class VideoSieveReel < VideoSieve
|
|
2
|
+
# To check if it's valid for the inputted graphql objects
|
|
3
|
+
def self.check(graphql_objects)
|
|
4
|
+
video_object = self.extractor(graphql_objects)
|
|
5
|
+
|
|
6
|
+
return false unless video_object.has_key?("short_form_video_context")
|
|
7
|
+
true
|
|
8
|
+
rescue StandardError
|
|
9
|
+
return false
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# output the expected format of:
|
|
13
|
+
#
|
|
14
|
+
# post_details = {
|
|
15
|
+
# id: video_object["id"],
|
|
16
|
+
# num_comments: num_comments,
|
|
17
|
+
# num_shares: share_count_object.fetch("count", nil),
|
|
18
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
|
19
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
|
20
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
|
21
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
|
22
|
+
# text: text,
|
|
23
|
+
# created_at: creation_date,
|
|
24
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
|
25
|
+
# has_video: true
|
|
26
|
+
# }
|
|
27
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
|
28
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
|
29
|
+
# post_details[:reactions] = reaction_counts
|
|
30
|
+
|
|
31
|
+
def self.sieve(graphql_objects)
|
|
32
|
+
video_object = self.extractor(graphql_objects)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
feedback_object = graphql_objects.filter do |go|
|
|
36
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
|
37
|
+
!go.dig("feedback", "top_level_comments").nil?
|
|
38
|
+
end.first
|
|
39
|
+
|
|
40
|
+
reels_feedback_renderer = graphql_objects.filter do |go|
|
|
41
|
+
go.dig("reels_feedback_renderer")
|
|
42
|
+
end.first
|
|
43
|
+
|
|
44
|
+
reels_feedback_renderer["reels_feedback_renderer"]["story"]
|
|
45
|
+
reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
|
|
46
|
+
|
|
47
|
+
video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
|
|
48
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
|
|
49
|
+
|
|
50
|
+
post_details = {
|
|
51
|
+
id: video_object["short_form_video_context"]["video"]["id"],
|
|
52
|
+
num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
|
|
53
|
+
num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
|
|
54
|
+
num_views: nil,
|
|
55
|
+
reshare_warning: reshare_warning,
|
|
56
|
+
video_preview_image_url: video_preview_image_url,
|
|
57
|
+
video_url: video_url,
|
|
58
|
+
text: nil, # Reels don't have text
|
|
59
|
+
created_at: JSON.parse(feedback_object["tracking"])["page_insights"].first[1]["post_context"]["publish_time"], # Yea, this is weird
|
|
60
|
+
profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
|
|
61
|
+
has_video: true,
|
|
62
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
|
63
|
+
video_file: Forki.retrieve_media(video_url),
|
|
64
|
+
reactions: nil # Only available on comments it seems? Look into this again sometime
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def self.extractor(graphql_objects)
|
|
71
|
+
video_objects = graphql_objects.filter do |go|
|
|
72
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
|
73
|
+
go.has_key?("video")
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
video_objects.first.dig("video", "creation_story")
|
|
77
|
+
end
|
|
78
|
+
end
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: forki
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- ''
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-10-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: capybara
|
|
@@ -126,6 +126,7 @@ files:
|
|
|
126
126
|
- lib/forki/scrapers/scraper.rb
|
|
127
127
|
- lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
|
|
128
128
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
|
|
129
|
+
- lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
|
|
129
130
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
|
|
130
131
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
|
|
131
132
|
- lib/forki/scrapers/user_scraper.rb
|
|
@@ -162,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
162
163
|
- !ruby/object:Gem::Version
|
|
163
164
|
version: '0'
|
|
164
165
|
requirements: []
|
|
165
|
-
rubygems_version: 3.4.
|
|
166
|
+
rubygems_version: 3.4.20
|
|
166
167
|
signing_key:
|
|
167
168
|
specification_version: 4
|
|
168
169
|
summary: A gem to scrape Facebook pages for archive purposes.
|