forki 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7edf04a9e64d21fa773c11ff10e0af20237061015195baac334e7de90750e7d8
4
- data.tar.gz: 0bfc1308c2e54cc36c7454f8dbdc9b1776493350fdabfc1e166f2d9214a7da71
3
+ metadata.gz: f4868fd61b98809521249982e057f69d69018e8defba25792382f44e06b1921a
4
+ data.tar.gz: 1c700275aa5fafc19e4f6b42845e5d52c02e2bc3e6cd3f1082e1a59c5c4f472d
5
5
  SHA512:
6
- metadata.gz: 90582f953c5490a2d2f338ecd3463dca1ebb4cad726307202aab90a71019a16476b42c46704ce0df462d71062117e2ca27dbd95ad2db06627d3be984e5528bbe
7
- data.tar.gz: 671538d2aaeefa63fb4380252e3d167da7b65f05e1af3899f91de9ce1dd1ef53100d04dd72b868baf690e8b2187f55d2a092474a6140e3ddb35c17fe52766f66
6
+ metadata.gz: b5967db4c8e10b9d626767f041b2f59627170a49ceb4dc47e6da11f439a668bc656550e9b0c6a3a6a52180c8e75637ab2cbfb8a3c1b007bb91c4a5280e21aabd
7
+ data.tar.gz: 8230ec7b913fca196e255820066efab1c520e42ac38818652cb252966a16b8ce915f5f1224606cd5eddb81968fcdbd9f37506e028c641bcad2a402536174d321
@@ -393,7 +393,7 @@ module Forki
393
393
  sleep(5)
394
394
  end
395
395
 
396
- # page.quit # Close browser between page navigations to prevent cache folder access issues
396
+ # page.quit # Close browser between page navigation to prevent cache folder access issues
397
397
 
398
398
  post_data[:user] = User.lookup(user_url).first
399
399
  page.quit
@@ -405,7 +405,12 @@ module Forki
405
405
  rescue StandardError => e
406
406
  raise e
407
407
  ensure
408
- page.quit
408
+ # `page` here can be broken already. In which case we want to raise an error so it's retried later
409
+ begin
410
+ page.quit
411
+ rescue Curl::Err::ConnectionFailedError
412
+ raise Forki::RretryableError # This insures it'll eventually be retried by Hypatia
413
+ end
409
414
  end
410
415
  end
411
416
  end
@@ -0,0 +1,78 @@
1
+ class VideoSieveReel < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ video_object = self.extractor(graphql_objects)
5
+
6
+ return false unless video_object.has_key?("short_form_video_context")
7
+ true
8
+ rescue StandardError
9
+ return false
10
+ end
11
+
12
+ # output the expected format of:
13
+ #
14
+ # post_details = {
15
+ # id: video_object["id"],
16
+ # num_comments: num_comments,
17
+ # num_shares: share_count_object.fetch("count", nil),
18
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
19
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
20
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
21
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
22
+ # text: text,
23
+ # created_at: creation_date,
24
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
25
+ # has_video: true
26
+ # }
27
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
28
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
29
+ # post_details[:reactions] = reaction_counts
30
+
31
+ def self.sieve(graphql_objects)
32
+ video_object = self.extractor(graphql_objects)
33
+
34
+
35
+ feedback_object = graphql_objects.filter do |go|
36
+ go = go.first if go.kind_of?(Array) && !go.empty?
37
+ !go.dig("feedback", "top_level_comments").nil?
38
+ end.first
39
+
40
+ reels_feedback_renderer = graphql_objects.filter do |go|
41
+ go.dig("reels_feedback_renderer")
42
+ end.first
43
+
44
+ reels_feedback_renderer["reels_feedback_renderer"]["story"]
45
+ reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
46
+
47
+ video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
48
+ video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
49
+
50
+ post_details = {
51
+ id: video_object["short_form_video_context"]["video"]["id"],
52
+ num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
53
+ num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
54
+ num_views: nil,
55
+ reshare_warning: reshare_warning,
56
+ video_preview_image_url: video_preview_image_url,
57
+ video_url: video_url,
58
+ text: nil, # Reels don't have text
59
+ created_at: JSON.parse(feedback_object["tracking"])["page_insights"].first[1]["post_context"]["publish_time"], # Yea, this is weird
60
+ profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
61
+ has_video: true,
62
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
63
+ video_file: Forki.retrieve_media(video_url),
64
+ reactions: nil # Only available on comments it seems? Look into this again sometime
65
+ }
66
+ end
67
+
68
+ private
69
+
70
+ def self.extractor(graphql_objects)
71
+ video_objects = graphql_objects.filter do |go|
72
+ go = go.first if go.kind_of?(Array) && !go.empty?
73
+ go.has_key?("video")
74
+ end
75
+
76
+ video_objects.first.dig("video", "creation_story")
77
+ end
78
+ end
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.2"
4
+ VERSION = "0.2.4"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-03 00:00:00.000000000 Z
11
+ date: 2023-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -126,6 +126,7 @@ files:
126
126
  - lib/forki/scrapers/scraper.rb
127
127
  - lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
128
128
  - lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
129
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
129
130
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
130
131
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
131
132
  - lib/forki/scrapers/user_scraper.rb
@@ -162,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
163
  - !ruby/object:Gem::Version
163
164
  version: '0'
164
165
  requirements: []
165
- rubygems_version: 3.4.14
166
+ rubygems_version: 3.4.20
166
167
  signing_key:
167
168
  specification_version: 4
168
169
  summary: A gem to scrape Facebook pages for archive purposes.