forki 0.2.2 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7edf04a9e64d21fa773c11ff10e0af20237061015195baac334e7de90750e7d8
4
- data.tar.gz: 0bfc1308c2e54cc36c7454f8dbdc9b1776493350fdabfc1e166f2d9214a7da71
3
+ metadata.gz: f4868fd61b98809521249982e057f69d69018e8defba25792382f44e06b1921a
4
+ data.tar.gz: 1c700275aa5fafc19e4f6b42845e5d52c02e2bc3e6cd3f1082e1a59c5c4f472d
5
5
  SHA512:
6
- metadata.gz: 90582f953c5490a2d2f338ecd3463dca1ebb4cad726307202aab90a71019a16476b42c46704ce0df462d71062117e2ca27dbd95ad2db06627d3be984e5528bbe
7
- data.tar.gz: 671538d2aaeefa63fb4380252e3d167da7b65f05e1af3899f91de9ce1dd1ef53100d04dd72b868baf690e8b2187f55d2a092474a6140e3ddb35c17fe52766f66
6
+ metadata.gz: b5967db4c8e10b9d626767f041b2f59627170a49ceb4dc47e6da11f439a668bc656550e9b0c6a3a6a52180c8e75637ab2cbfb8a3c1b007bb91c4a5280e21aabd
7
+ data.tar.gz: 8230ec7b913fca196e255820066efab1c520e42ac38818652cb252966a16b8ce915f5f1224606cd5eddb81968fcdbd9f37506e028c641bcad2a402536174d321
@@ -393,7 +393,7 @@ module Forki
393
393
  sleep(5)
394
394
  end
395
395
 
396
- # page.quit # Close browser between page navigations to prevent cache folder access issues
396
+ # page.quit # Close browser between page navigation to prevent cache folder access issues
397
397
 
398
398
  post_data[:user] = User.lookup(user_url).first
399
399
  page.quit
@@ -405,7 +405,12 @@ module Forki
405
405
  rescue StandardError => e
406
406
  raise e
407
407
  ensure
408
- page.quit
408
+ # `page` here can be broken already. In which case we want to raise an error so it's retried later
409
+ begin
410
+ page.quit
411
+ rescue Curl::Err::ConnectionFailedError
412
+ raise Forki::RretryableError # This insures it'll eventually be retried by Hypatia
413
+ end
409
414
  end
410
415
  end
411
416
  end
@@ -0,0 +1,78 @@
1
+ class VideoSieveReel < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ video_object = self.extractor(graphql_objects)
5
+
6
+ return false unless video_object.has_key?("short_form_video_context")
7
+ true
8
+ rescue StandardError
9
+ return false
10
+ end
11
+
12
+ # output the expected format of:
13
+ #
14
+ # post_details = {
15
+ # id: video_object["id"],
16
+ # num_comments: num_comments,
17
+ # num_shares: share_count_object.fetch("count", nil),
18
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
19
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
20
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
21
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
22
+ # text: text,
23
+ # created_at: creation_date,
24
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
25
+ # has_video: true
26
+ # }
27
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
28
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
29
+ # post_details[:reactions] = reaction_counts
30
+
31
+ def self.sieve(graphql_objects)
32
+ video_object = self.extractor(graphql_objects)
33
+
34
+
35
+ feedback_object = graphql_objects.filter do |go|
36
+ go = go.first if go.kind_of?(Array) && !go.empty?
37
+ !go.dig("feedback", "top_level_comments").nil?
38
+ end.first
39
+
40
+ reels_feedback_renderer = graphql_objects.filter do |go|
41
+ go.dig("reels_feedback_renderer")
42
+ end.first
43
+
44
+ reels_feedback_renderer["reels_feedback_renderer"]["story"]
45
+ reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
46
+
47
+ video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
48
+ video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
49
+
50
+ post_details = {
51
+ id: video_object["short_form_video_context"]["video"]["id"],
52
+ num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
53
+ num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
54
+ num_views: nil,
55
+ reshare_warning: reshare_warning,
56
+ video_preview_image_url: video_preview_image_url,
57
+ video_url: video_url,
58
+ text: nil, # Reels don't have text
59
+ created_at: JSON.parse(feedback_object["tracking"])["page_insights"].first[1]["post_context"]["publish_time"], # Yea, this is weird
60
+ profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
61
+ has_video: true,
62
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
63
+ video_file: Forki.retrieve_media(video_url),
64
+ reactions: nil # Only available on comments it seems? Look into this again sometime
65
+ }
66
+ end
67
+
68
+ private
69
+
70
+ def self.extractor(graphql_objects)
71
+ video_objects = graphql_objects.filter do |go|
72
+ go = go.first if go.kind_of?(Array) && !go.empty?
73
+ go.has_key?("video")
74
+ end
75
+
76
+ video_objects.first.dig("video", "creation_story")
77
+ end
78
+ end
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.2"
4
+ VERSION = "0.2.4"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-03 00:00:00.000000000 Z
11
+ date: 2023-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -126,6 +126,7 @@ files:
126
126
  - lib/forki/scrapers/scraper.rb
127
127
  - lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
128
128
  - lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
129
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
129
130
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
130
131
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
131
132
  - lib/forki/scrapers/user_scraper.rb
@@ -162,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
163
  - !ruby/object:Gem::Version
163
164
  version: '0'
164
165
  requirements: []
165
- rubygems_version: 3.4.14
166
+ rubygems_version: 3.4.20
166
167
  signing_key:
167
168
  specification_version: 4
168
169
  summary: A gem to scrape Facebook pages for archive purposes.