forki 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4868fd61b98809521249982e057f69d69018e8defba25792382f44e06b1921a
|
4
|
+
data.tar.gz: 1c700275aa5fafc19e4f6b42845e5d52c02e2bc3e6cd3f1082e1a59c5c4f472d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5967db4c8e10b9d626767f041b2f59627170a49ceb4dc47e6da11f439a668bc656550e9b0c6a3a6a52180c8e75637ab2cbfb8a3c1b007bb91c4a5280e21aabd
|
7
|
+
data.tar.gz: 8230ec7b913fca196e255820066efab1c520e42ac38818652cb252966a16b8ce915f5f1224606cd5eddb81968fcdbd9f37506e028c641bcad2a402536174d321
|
@@ -393,7 +393,7 @@ module Forki
|
|
393
393
|
sleep(5)
|
394
394
|
end
|
395
395
|
|
396
|
-
# page.quit # Close browser between page
|
396
|
+
# page.quit # Close browser between page navigation to prevent cache folder access issues
|
397
397
|
|
398
398
|
post_data[:user] = User.lookup(user_url).first
|
399
399
|
page.quit
|
@@ -405,7 +405,12 @@ module Forki
|
|
405
405
|
rescue StandardError => e
|
406
406
|
raise e
|
407
407
|
ensure
|
408
|
-
page.
|
408
|
+
# `page` here can be broken already. In which case we want to raise an error so it's retried later
|
409
|
+
begin
|
410
|
+
page.quit
|
411
|
+
rescue Curl::Err::ConnectionFailedError
|
412
|
+
raise Forki::RretryableError # This insures it'll eventually be retried by Hypatia
|
413
|
+
end
|
409
414
|
end
|
410
415
|
end
|
411
416
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
class VideoSieveReel < VideoSieve
|
2
|
+
# To check if it's valid for the inputted graphql objects
|
3
|
+
def self.check(graphql_objects)
|
4
|
+
video_object = self.extractor(graphql_objects)
|
5
|
+
|
6
|
+
return false unless video_object.has_key?("short_form_video_context")
|
7
|
+
true
|
8
|
+
rescue StandardError
|
9
|
+
return false
|
10
|
+
end
|
11
|
+
|
12
|
+
# output the expected format of:
|
13
|
+
#
|
14
|
+
# post_details = {
|
15
|
+
# id: video_object["id"],
|
16
|
+
# num_comments: num_comments,
|
17
|
+
# num_shares: share_count_object.fetch("count", nil),
|
18
|
+
# num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
|
19
|
+
# reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
|
20
|
+
# video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
21
|
+
# video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
|
22
|
+
# text: text,
|
23
|
+
# created_at: creation_date,
|
24
|
+
# profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
25
|
+
# has_video: true
|
26
|
+
# }
|
27
|
+
# post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
|
28
|
+
# post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
|
29
|
+
# post_details[:reactions] = reaction_counts
|
30
|
+
|
31
|
+
def self.sieve(graphql_objects)
|
32
|
+
video_object = self.extractor(graphql_objects)
|
33
|
+
|
34
|
+
|
35
|
+
feedback_object = graphql_objects.filter do |go|
|
36
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
37
|
+
!go.dig("feedback", "top_level_comments").nil?
|
38
|
+
end.first
|
39
|
+
|
40
|
+
reels_feedback_renderer = graphql_objects.filter do |go|
|
41
|
+
go.dig("reels_feedback_renderer")
|
42
|
+
end.first
|
43
|
+
|
44
|
+
reels_feedback_renderer["reels_feedback_renderer"]["story"]
|
45
|
+
reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
|
46
|
+
|
47
|
+
video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
|
48
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
|
49
|
+
|
50
|
+
post_details = {
|
51
|
+
id: video_object["short_form_video_context"]["video"]["id"],
|
52
|
+
num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
|
53
|
+
num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
|
54
|
+
num_views: nil,
|
55
|
+
reshare_warning: reshare_warning,
|
56
|
+
video_preview_image_url: video_preview_image_url,
|
57
|
+
video_url: video_url,
|
58
|
+
text: nil, # Reels don't have text
|
59
|
+
created_at: JSON.parse(feedback_object["tracking"])["page_insights"].first[1]["post_context"]["publish_time"], # Yea, this is weird
|
60
|
+
profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
|
61
|
+
has_video: true,
|
62
|
+
video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
|
63
|
+
video_file: Forki.retrieve_media(video_url),
|
64
|
+
reactions: nil # Only available on comments it seems? Look into this again sometime
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def self.extractor(graphql_objects)
|
71
|
+
video_objects = graphql_objects.filter do |go|
|
72
|
+
go = go.first if go.kind_of?(Array) && !go.empty?
|
73
|
+
go.has_key?("video")
|
74
|
+
end
|
75
|
+
|
76
|
+
video_objects.first.dig("video", "creation_story")
|
77
|
+
end
|
78
|
+
end
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- lib/forki/scrapers/scraper.rb
|
127
127
|
- lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
|
128
128
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
|
129
|
+
- lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
|
129
130
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
|
130
131
|
- lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
|
131
132
|
- lib/forki/scrapers/user_scraper.rb
|
@@ -162,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
163
|
- !ruby/object:Gem::Version
|
163
164
|
version: '0'
|
164
165
|
requirements: []
|
165
|
-
rubygems_version: 3.4.
|
166
|
+
rubygems_version: 3.4.20
|
166
167
|
signing_key:
|
167
168
|
specification_version: 4
|
168
169
|
summary: A gem to scrape Facebook pages for archive purposes.
|